This file is used to prepare the figures for the paper.

library(dplyr)
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)
library(org.Mm.eg.db)

.libPaths()
## [1] "/usr/local/lib/R/library"

Preparation

Here are the folders where analyzes are stored :

data_dir = "./.."
list.files(data_dir)
##  [1] "0_intro"      "1_metadata"   "2_individual" "3_combined"   "4_zoom"      
##  [6] "5_wu"         "6_figures"    "LICENSE"      "index.html"   "index_layout"

We load the dataset containing all cells :

sobj = readRDS(paste0(data_dir, "/3_combined/hs_hd_sobj.rds"))
sobj
## An object of class Seurat 
## 20003 features across 12111 samples within 1 assay 
## Active assay: RNA (20003 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_38_tsne, RNA_pca_38_umap, harmony, harmony_38_umap, harmony_38_tsne

These are all the samples analyzed :

sample_info = readRDS(paste0(data_dir, "/1_metadata/hs_hd_sample_info.rds"))

# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
  as.data.frame.table(., stringsAsFactors = FALSE) %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  dplyr::left_join(x = ., y = sample_info, by = "sample_identifier") 

# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
  patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))

These are the custom colors for cell populations :

color_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_color_markers.rds"))
color_markers = color_markers[names(color_markers) != "melanocytes"]
ors_color = color_markers["ORS"]
color_markers["ORS"] = color_markers["IFE"] 
color_markers["IFE"] = ors_color
color_markers["B cells"] = "chocolate3"
rm(ors_color)

# re-order
color_markers = color_markers[c("CD4 T cells", "CD8 T cells", "Langerhans cells", "macrophages", "B cells",
                                "cuticle", "cortex", "medulla", "IRS", "proliferative",
                                "HFSC", "ORS", "IBL", "IFE", "sebocytes")]

data.frame(cell_type = names(color_markers),
           color = unlist(color_markers)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank(),
                 axis.text.x = element_text(hjust = 1, angle = 20))

We define custom colors for sample type :

sample_type_colors = setNames(nm = levels(sample_info$sample_type),
                              c("#C55F40", "#2C78E6"))

data.frame(cell_type = names(sample_type_colors),
           color = unlist(sample_type_colors)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(sample_type_colors), breaks = names(sample_type_colors)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank())

We set a background color :

bg_color = "gray94"

This is the correspondence between cell types and cell families, and custom colors to color cells by cell family :

custom_order_cell_type = data.frame(
  cell_type = names(color_markers),
  cell_family = c(rep("immune cells", 5),
                  rep("matrix", 5),
                  rep("non matrix", 5)),
  stringsAsFactors = FALSE)
custom_order_cell_type$cell_type = factor(custom_order_cell_type$cell_type,
                                          levels = custom_order_cell_type$cell_type)
rownames(custom_order_cell_type) = custom_order_cell_type$cell_type

family_color = c("immune cells" = "slateblue1",
                 "matrix" = "mediumseagreen",
                 "non matrix" = "firebrick3")

We load markers to display on a dotplot to assess cell type annotation :

dotplot_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_dotplot_markers.rds"))
dotplot_markers = dotplot_markers[names(dotplot_markers) != "melanocytes"]
lengths(dotplot_markers)
##      CD4 T cells      CD8 T cells Langerhans cells      macrophages 
##                2                2                2                2 
##          B cells          cuticle           cortex          medulla 
##                2                2                2                2 
##              IRS    proliferative              IBL              ORS 
##                2                2                2                2 
##              IFE             HFSC        sebocytes 
##                2                2                2

Custom functions to display gene expression on the heatmap :

color_fun = function(one_gene) {
  gene_range = range(ht_annot[, one_gene])
  gene_palette = circlize::colorRamp2(colors = c("#FFFFFF", aquarius::color_gene[-1]),
                                      breaks = seq(from = gene_range[1], to = gene_range[2],
                                                   length.out = length(aquarius::color_gene)))
  return(gene_palette)
}

All samples

Settings

This is the projection name to visualize cells :

name2D = "harmony_38_tsne"
name2D_atlas = name2D

Preparation

We make a low resolutive clustering for the heatmap :

sobj = Seurat::FindClusters(sobj, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 12111
## Number of edges: 475472
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9421
## Number of communities: 17
## Elapsed time: 1 seconds
length(levels(sobj$seurat_clusters))
## [1] 17

We define cluster type and cluster family :

sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))
sobj$cluster_family = custom_order_cell_type[sobj$cluster_type, "cell_family"]
sobj$cluster_family = factor(sobj$cluster_family,
                             levels = names(family_color))

Figures

Project name :

# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj$project_name
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$project_name) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

Sample type :

# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_type = sobj$sample_type
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_type)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_type_colors,
                              breaks = names(sample_type_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

Cluster :

grey_palette = setNames(nm = levels(sobj$seurat_clusters),
                        rep("#D9D9D9", length(levels(sobj$seurat_clusters))))
grey_palette[c("7", "16", "1", "12", "11", "10", "15")] = "#BDBDBD"
grey_palette[c("16", "14", "5", "9")] = "#969696"

Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.4,
                group.by = "seurat_clusters", cols = grey_palette,
                label = TRUE, label.size = 6) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

Cell type annotation :

Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

Cluster family annotation :

Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_family", cols = family_color) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

Cell type annotation split by condition :

plot_list = aquarius::plot_split_dimred(sobj, reduction = name2D,
                                        group_by = "cell_type",
                                        group_color = color_markers,
                                        split_by = "sample_type",
                                        split_color = sample_type_colors,
                                        bg_color = bg_color)

patchwork::wrap_plots(plot_list) &
  Seurat::NoLegend()

Gene expression to assess annotation :

genes = c("PTPRC", "MSX2", "KRT14")
names(genes) = c("immune cells", "matrix cells", "non-matrix cells")

plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
  gene = genes[[gene_id]]
  pop = names(genes)[gene_id]
  
  sobj$my_gene = Seurat::FetchData(sobj, gene)[, 1] %>%
    aquarius::run_rescale(., new_min = 0, new_max = 10)
  
  Seurat::FeaturePlot(sobj, features = "my_gene", reduction = name2D) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
                                   breaks = seq(0, 10, by = 2.5),
                                   labels = c("min", rep("", 3), "max")) +
    ggplot2::labs(title = gene) + 
    # subtitle = pop) +
    ggplot2::theme(aspect.ratio = 1,
                   plot.title = element_text(hjust = 0.5, size = 17),
                   plot.subtitle = element_text(hjust = 0.5, size = 15),
                   legend.text = element_text(size = 15),
                   legend.position = "none") +
    Seurat::NoAxes()
})

plot_list
## [[1]]

## 
## [[2]]

## 
## [[3]]

Barplot by cluster family :

quantif = table(sobj$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "nb_cells"))

aquarius::plot_barplot(df = table(sobj$sample_identifier,
                                  sobj$cluster_family) %>%
                         as.data.frame.table() %>%
                         `colnames<-`(c("Sample", "Cell Type", "Number")),
                       x = "Sample", y = "Number", fill = "Cell Type",
                       position = position_fill()) +
  ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
                      aes(x = .data$Sample, y = 1.05, label = .data$nb_cells),
                      label.size = 0, size = 4) +
  ggplot2::scale_fill_manual(values = unlist(family_color),
                             breaks = names(family_color),
                             name = "Cell Family") +
  ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
                              labels = paste0(seq(0, 100, by = 25), sep = " %"),
                              expand = ggplot2::expansion(add = c(0, 0.05))) +
  ggplot2::theme(axis.title.y = element_blank(),
                 axis.line.x = element_line(colour = "lightgray"),
                 text = element_text(size = 15),
                 axis.text.x = element_text(margin = margin(t = 25, r = 0, b = 0, l = 0)),
                 legend.position = "none")

Heatmap of cluster proportion by sample :

group_by = "seurat_clusters"

cluster_by_sample = table(sobj$sample_identifier,
                          sobj@meta.data[, group_by]) %>%
  prop.table(margin = 1) %>%
  as.matrix()

## Right annotation : number of cells by dataset
ht_annot = table(sobj$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  `rownames<-`(.$sample_identifier) %>%
  dplyr::select(-sample_identifier)

ha_right = ComplexHeatmap::HeatmapAnnotation(
  df = ht_annot,
  which = "row",
  show_legend = TRUE,
  annotation_name_side = "top",
  col = list(nb_cells  = circlize::colorRamp2(colors = RColorBrewer::brewer.pal(name = "Greys", n = 9),
                                              breaks = seq(from = range(ht_annot$nb_cells)[1],
                                                           to = range(ht_annot$nb_cells)[2],
                                                           length.out = 9))))

## Left annotation : gender
ha_left = ComplexHeatmap::HeatmapAnnotation(
  gender = sample_info$gender,
  which = "row",
  show_legend = TRUE,
  annotation_name_side = "top",
  col = list(gender = setNames(nm = c("F", "M"),
                               c("lightcyan3", "navyblue"))))

## Top annotation : main cell type in this cluster
ht_annot = table(sobj$sample_identifier,
                 sobj@meta.data[, group_by]) %>%
  prop.table(margin = 1) %>%
  as.matrix()

ht_annot = table(sobj$cell_type,
                 sobj@meta.data[, group_by]) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
ht_annot = data.frame(row.names = names(ht_annot),
                      cell_type = names(color_markers)[ht_annot],
                      stringsAsFactors = FALSE)
ht_annot = dplyr::left_join(ht_annot, custom_order_cell_type, by = "cell_type") %>%
  # Simplification for matrix
  dplyr::mutate(cell_type = ifelse(cell_type %in% c("medulla", "cortex", "cuticle"), yes = "hair shaft", no = cell_type)) %>%
  # Simplification for T cells
  dplyr::mutate(cell_type = ifelse(cell_type %in% c("CD4 T cells", "CD8 T cells"), yes = "T cells", no = cell_type)) %>%
  # Simplification for APC
  dplyr::mutate(cell_type = ifelse(cell_type %in% c("Langerhans cells", "macrophages"), yes = "APC", no = cell_type)) %>%
  # Add color
  dplyr::mutate(color = as.character(color_markers[cell_type])) %>%
  dplyr::mutate(color = ifelse(cell_type == "hair shaft", yes = "#FFB6C1", no = color)) %>%
  dplyr::mutate(color = ifelse(cell_type == "T cells", yes = "#8A6EE6", no = color)) %>%
  dplyr::mutate(color = ifelse(cell_type == "APC", yes = "#9CAA4B", no = color))

ha_top = ComplexHeatmap::HeatmapAnnotation(
  # cell_type = ht_annot$cell_type,
  cell_family = ht_annot$cell_family,
  which = "column",
  show_legend = TRUE,
  show_annotation_name = FALSE,
  # annotation_name_side = "left",
  col = list(#cell_type = setNames(nm = ht_annot$cell_type,
    #                      ht_annot$color),
    cell_family = family_color
  ))

## Assemble heatmap
ht = ComplexHeatmap::Heatmap(cluster_by_sample,
                             heatmap_legend_param = list(title = "Proportion",
                                                         col = c("#2166AC", "#F7F7F7", "#B2182B")),
                             # bottom_annotation = ha_bottom,
                             right_annotation = ha_right,
                             left_annotation = ha_left,
                             top_annotation = ha_top,
                             cluster_rows = TRUE,
                             cluster_columns = TRUE,
                             row_title = "Sample",
                             row_names_gp = grid::gpar(names = sample_info$sample_identifier,
                                                       col = sample_info$color,
                                                       fontface = "bold"),
                             column_title = "Cluster",
                             column_names_centered = TRUE,
                             row_names_side = "left",
                             column_names_side = "top",
                             column_names_rot = 0)

## Draw !
ComplexHeatmap::draw(ht, merge_legends = TRUE)

For the dotplot, we clarify clusters and cell type annotation :

cell_type_in_cluster = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 1) %>%
  apply(., 1, which.max)
cell_type_in_cluster = cell_type_in_cluster - 1

missing_cluster = setdiff(levels(sobj$seurat_clusters),
                          cell_type_in_cluster)

cell_type_in_cluster = data.frame(cell_type = c(names(cell_type_in_cluster), cluster_type[missing_cluster]),
                                  cluster_id = c(cell_type_in_cluster, names(cluster_type[missing_cluster])),
                                  stringsAsFactors = FALSE, row.names = NULL) %>%
  dplyr::mutate(cluster_id = as.numeric(cluster_id)) %>%
  dplyr::arrange(cell_type, cluster_id)

custom_order_cell_type$clusters = custom_order_cell_type %>%
  apply(., MARGIN = 1, FUN = function(one_row) {
    cell_type = one_row["cell_type"]
    clusters = cell_type_in_cluster %>%
      dplyr::filter(.data$cell_type == .env$cell_type) %>%
      dplyr::pull(cluster_id)
    
    cell_type_cluster = paste0(cell_type, " (", paste0(clusters, collapse = ", "), ")")
    
    return(cell_type_cluster)
  }) %>%
  factor(., levels = .)

custom_order_cell_type
##                         cell_type  cell_family                     clusters
## CD4 T cells           CD4 T cells immune cells              CD4 T cells (2)
## CD8 T cells           CD8 T cells immune cells          CD8 T cells (2, 14)
## Langerhans cells Langerhans cells immune cells         Langerhans cells (6)
## macrophages           macrophages immune cells              macrophages (6)
## B cells                   B cells immune cells                 B cells (16)
## cuticle                   cuticle       matrix                  cuticle (3)
## cortex                     cortex       matrix                   cortex (3)
## medulla                   medulla       matrix                 medulla (11)
## IRS                           IRS       matrix                     IRS (15)
## proliferative       proliferative       matrix proliferative (4, 9, 10, 13)
## HFSC                         HFSC   non matrix                  HFSC (7, 8)
## ORS                           ORS   non matrix                      ORS (0)
## IBL                           IBL   non matrix                      IBL (1)
## IFE                           IFE   non matrix                      IFE (5)
## sebocytes               sebocytes   non matrix               sebocytes (12)

Dotplot :

plot_list = aquarius::plot_dotplot(sobj,
                                   markers = c("PTPRC",
                                               "CD3E", "CD4",
                                               "CD3E", "CD8A",
                                               "CD207", "AIF1",
                                               "TREM2", "MSR1",
                                               "CD79A", "CD79B",
                                               # "PRDM1", "KRT85",
                                               "MSX2",
                                               "KRT32", "KRT35",
                                               "KRT31", "PRR9",
                                               "BAMBI", "ALDH1A3",
                                               "KRT71", "KRT73",
                                               "TOP2A", "MCM5",
                                               "KRT14", "CXCL14",
                                               "KRT15", "COL17A1",
                                               "DIO2", "TCEAL2",
                                               "KRT16", "KRT6C",
                                               "SPINK5", "LY6D",
                                               "CLMP", "PPARG"),
                                   assay = "RNA", column_name = "cell_type", nb_hline = 0) +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(legend.position = "left",
                 legend.justification = "bottom",
                 legend.box = "vertical",
                 legend.box.margin = margin(0,70,0,0),
                 axis.title = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.text.x = element_blank(),
                 axis.line.x = element_blank(),
                 plot.margin = unit(rep(0, 4), "cm"))

p = ggplot2::ggplot(custom_order_cell_type, aes(x = clusters, y = 0)) +
  ggplot2::geom_point(size = 0) +
  ggplot2::geom_segment(aes(x = 0.5, xend = 5.5, y = 0, yend = 0), size = 6, col = family_color["immune cells"]) +
  ggplot2::geom_segment(aes(x = 5.5, xend = 10.5, y = 0, yend = 0), size = 6, col = family_color["matrix"]) +
  ggplot2::geom_segment(aes(x = 10.5, xend = 15.5, y = 0, yend = 0), size = 6, col = family_color["non matrix"]) +
  ggplot2::scale_y_continuous(expand = c(0,0), limits = c(0,0)) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.y = element_blank(),
                 axis.ticks.y = element_blank(),
                 axis.title = element_blank(),
                 axis.line.y = element_blank(),
                 axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
                 plot.margin = unit(c(0,0.5,0.5,0), "cm"))

plot_list = patchwork::wrap_plots(plot_list, p,
                                  ncol = 1, heights = c(25, 1))
plot_list

Immune cells

Settings

We load the immune cells dataset :

sobj_ic = readRDS(paste0(data_dir, "/4_zoom/1_zoom_immune/immune_cells_sobj.rds"))
sobj_ic
## An object of class Seurat 
## 15121 features across 2329 samples within 1 assay 
## Active assay: RNA (15121 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne, RNA_pca_20_umap, harmony, harmony_20_umap, harmony_20_tsne

This is the projection name to visualize cells :

name2D = "harmony_20_tsne"

To represent results from differential expression, we load the analyses results :

list_results = readRDS(paste0(data_dir, "/4_zoom/1_zoom_immune/immune_cells_list_results.rds"))

lapply(list_results, FUN = names)
## $`Langerhans cells`
## [1] "mark" "gsea"
## 
## $macrophages
## [1] "mark"       "enrichr_hs" "enrichr_hd" "gsea"      
## 
## $`CD4 T cells`
## [1] "mark"       "enrichr_hs" "enrichr_hd" "gsea"      
## 
## $`CD8 T cells`
## [1] "mark"       "enrichr_hs" "enrichr_hd" "gsea"

Preparation

We defined cluster type and cluster family :

cluster_type = table(sobj_ic$cell_type, sobj_ic$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj_ic$cell_type)[cluster_type])

sobj_ic$cluster_type = cluster_type[sobj_ic$seurat_clusters]
sobj_ic$cluster_type = factor(sobj_ic$cluster_type,
                              levels = levels(sobj_ic$cell_type))

Figures

Control cells on the full atlas :

sobj$is_immune = (colnames(sobj) %in% colnames(sobj_ic))

Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_immune", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(family_color[["immune cells"]], bg_color),
                              breaks = c(TRUE, FALSE)) +
  ggplot2::labs(title = "Immune cells",
                subtitle = paste0(ncol(sobj_ic), " cells")) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_text(hjust = 0.5, face = "bold"),
                 plot.subtitle = element_text(hjust = 0.5)) +
  Seurat::NoAxes() + Seurat::NoLegend()

Cluster type :

Seurat::DimPlot(sobj_ic, reduction = name2D, pt.size = 1,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

Violin plot of IL1B in macrophages :

subsobj = subset(sobj_ic, seurat_clusters == 2)
table(subsobj$sample_type)
## 
##  HS  HD 
## 378  31
il1b_hs = subsobj@assays$RNA@data["IL1B", subsobj$sample_type == "HS"]
il1b_hd = subsobj@assays$RNA@data["IL1B", subsobj$sample_type == "HD"]
il1b_hs_VS_il1b_hd = stats::t.test(il1b_hs, il1b_hd)
il1b_hs_VS_il1b_hd
## 
##  Welch Two Sample t-test
## 
## data:  il1b_hs and il1b_hd
## t = 2.3206, df = 35.801, p-value = 0.02612
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.05066074 0.75429252
## sample estimates:
## mean of x mean of y 
## 0.9256690 0.5231924
Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "IL1B", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

Split by sample :

Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
                features = "IL1B", cols = sample_info$color) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

Violin plot of IL1B in macrophages :

il6_hs = subsobj@assays$RNA@data["IL6", subsobj$sample_type == "HS"]
il6_hd = subsobj@assays$RNA@data["IL6", subsobj$sample_type == "HD"]
il6_hs_VS_il6_hd = stats::t.test(il6_hs, il6_hd)
il6_hs_VS_il6_hd
## 
##  Welch Two Sample t-test
## 
## data:  il6_hs and il6_hd
## t = 2.3591, df = 377, p-value = 0.01883
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.001453521 0.016004661
## sample estimates:
##   mean of x   mean of y 
## 0.008729091 0.000000000
Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "IL6", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

Violin plot of TNF in macrophages :

tnf_hs = subsobj@assays$RNA@data["TNF", subsobj$sample_type == "HS"]
tnf_hd = subsobj@assays$RNA@data["TNF", subsobj$sample_type == "HD"]
tnf_hs_VS_tnf_hd = stats::t.test(tnf_hs, tnf_hd)
tnf_hs_VS_tnf_hd
## 
##  Welch Two Sample t-test
## 
## data:  tnf_hs and tnf_hd
## t = 3.8395, df = 45.546, p-value = 0.0003786
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1140962 0.3657054
## sample estimates:
##  mean of x  mean of y 
## 0.31784960 0.07794879
Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "TNF", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

Split by sample :

Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
                features = "TNF", cols = sample_info$color) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

Violin plot of GZMA in CD4 T cells :

subsobj = subset(sobj_ic, seurat_clusters %in% c(0,10))
table(subsobj$sample_type)
## 
##  HS  HD 
## 569  90
gzma_hs = subsobj@assays$RNA@data["GZMA", subsobj$sample_type == "HS"]
gzma_hd = subsobj@assays$RNA@data["GZMA", subsobj$sample_type == "HD"]
gzma_hs_VS_gzma_hd = stats::t.test(gzma_hs, gzma_hd)
gzma_hs_VS_gzma_hd
## 
##  Welch Two Sample t-test
## 
## data:  gzma_hs and gzma_hd
## t = 14.755, df = 172.51, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1.225578 1.604097
## sample estimates:
## mean of x mean of y 
## 1.8456108 0.4307735
Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "GZMA", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

Violin plot of IFNG in CD4 T cells :

ifng_hs = subsobj@assays$RNA@data["IFNG", subsobj$sample_type == "HS"]
ifng_hd = subsobj@assays$RNA@data["IFNG", subsobj$sample_type == "HD"]
ifng_hs_VS_ifng_hd = stats::t.test(ifng_hs, ifng_hd)
ifng_hs_VS_ifng_hd
## 
##  Welch Two Sample t-test
## 
## data:  ifng_hs and ifng_hd
## t = 8.1978, df = 651.25, p-value = 1.303e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1874856 0.3055919
## sample estimates:
##  mean of x  mean of y 
## 0.25885178 0.01231299
Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "IFNG", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

Violin plot of IL17A in CD4 T cells :

IL17_hs = subsobj@assays$RNA@data["IL17A", subsobj$sample_type == "HS"]
IL17_hd = subsobj@assays$RNA@data["IL17A", subsobj$sample_type == "HD"]
IL17_hs_VS_IL17_hd = stats::t.test(IL17_hs, IL17_hd)
IL17_hs_VS_IL17_hd
## 
##  Welch Two Sample t-test
## 
## data:  IL17_hs and IL17_hd
## t = 4.5667, df = 219.64, p-value = 8.255e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1412906 0.3558327
## sample estimates:
##  mean of x  mean of y 
## 0.30323619 0.05467451
Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "IL17RE", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

We represent some genes split by sample type :

plot_list = lapply(c("IL1B", "GZMA", "IFNG", "IL17A", "TNF", "IL6"), FUN = function(one_gene) {
  p = aquarius::plot_split_dimred(sobj_ic,
                                  reduction = name2D,
                                  split_by = "sample_type",
                                  color_by = one_gene,
                                  color_palette = c("gray70", "#FDBB84", "#EF6548", "#7F0000", "black"),
                                  main_pt_size = 0.6,
                                  bg_pt_size = 0.6,
                                  order = TRUE,
                                  bg_color = "gray95")
  p = patchwork::wrap_plots(p, nrow = 1) +
    patchwork::plot_layout(guides = "collect") +
    ggplot2::theme(legend.position = "right") &
    ggplot2::theme(plot.subtitle = element_blank())
  return(p)
})

plot_list
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

Barplot by cluster type :

quantif = table(sobj_ic$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "nb_cells"))

aquarius::plot_barplot(df = table(sobj_ic$sample_identifier,
                                  sobj_ic$cluster_type) %>%
                         as.data.frame.table() %>%
                         `colnames<-`(c("Sample", "Cell Type", "Number")),
                       x = "Sample", y = "Number", fill = "Cell Type",
                       position = position_stack()) +
  ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
                      aes(x = .data$Sample, y = 50 + .data$nb_cells, label = .data$nb_cells),
                      label.size = 0, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(color_markers),
                             breaks = names(color_markers),
                             name = "Cell Type") +
  ggplot2::scale_y_continuous(limits = c(0, 100 + max(quantif$nb_cells)),
                              expand = ggplot2::expansion(add = c(0, 0.05))) +
  ggplot2::theme(axis.title.y = element_blank(),
                 axis.line.x = element_line(colour = "lightgray"),
                 text = element_text(size = 15),
                 legend.position = "none")

Heatmap for macrophages :

subsobj = subset(sobj_ic, cluster_type == "macrophages")
features_oi = c("IL1B", "TNF",
                "HLA-DQA2", "HLA-DPA1", "HLA-DRB5",
                "HLA-A", "HLA-C", "B2M",
                "C1QA", "C1QB", "C1QC")

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## [1]  11 463
## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# Cells order
column_order = subsobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]))

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "bottom",
                     annotation_legend_side = "bottom")

Heatmap for CD4 T cells :

subsobj = subset(sobj_ic, cluster_type == "CD4 T cells")
features_oi = c("GZMA", "KLRB1", "BTG1", "ZFP36", "NFKBIA", "TXNIP", "CXCR4", "IFNG", "IL17A")

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## [1]   9 848
## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# Cells order
column_order = subsobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]))

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "left",
                     annotation_legend_side = "left")

HFSC

Settings

We load the HFSCs dataset :

sobj_hfsc = readRDS(paste0(data_dir, "/4_zoom/2_zoom_hfsc/hfsc_sobj.rds"))
sobj_hfsc
## An object of class Seurat 
## 15384 features across 1454 samples within 1 assay 
## Active assay: RNA (15384 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_24_tsne, RNA_pca_24_umap, harmony, harmony_24_umap, harmony_24_tsne

This is the projection name to visualize cells :

name2D = "harmony_24_tsne"

To represent results from differential expression, we load the analyses results :

list_results = readRDS(paste0(data_dir, "/4_zoom/2_zoom_hfsc/hfsc_list_results.rds"))

lapply(list_results, FUN = names)
## $cluster_0_8
## [1] "p_val"     "avg_logFC" "pct.1"     "pct.2"     "p_val_adj"
## 
## $cluster_2
## [1] "p_val"     "avg_logFC" "pct.1"     "pct.2"     "p_val_adj"
## 
## $cluster_1
## [1] "p_val"     "avg_logFC" "pct.1"     "pct.2"     "p_val_adj"
## 
## $cluster_3
## [1] "p_val"     "avg_logFC" "pct.1"     "pct.2"     "p_val_adj"

Figures

HFSCs on the full atlas :

sobj$is_hfsc = (colnames(sobj) %in% colnames(sobj_hfsc))

Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_hfsc", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[["HFSC"]], bg_color),
                              breaks = c(TRUE, FALSE)) +
  ggplot2::labs(title = "HFSCs",
                subtitle = paste0(ncol(sobj_hfsc), " cells")) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_text(hjust = 0.5, face = "bold"),
                 plot.subtitle = element_text(hjust = 0.5)) +
  Seurat::NoAxes() + Seurat::NoLegend()

KRT15 expression :

Seurat::FeaturePlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                    features = "KRT15") +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() + Seurat::NoLegend()

Genes of interest :

genes = c("TGFB2", "ANGPTL7", "FGF18", "MGP", "EPCAM", "KRT75", "NOTCH3", "PTHLH")

plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
  gene = genes[[gene_id]]
  
  sobj_hfsc$my_gene = Seurat::FetchData(sobj_hfsc, gene)[, 1] %>%
    aquarius::run_rescale(., new_min = 0, new_max = 10)
  
  Seurat::FeaturePlot(sobj_hfsc, features = "my_gene", reduction = name2D) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
                                   breaks = seq(0, 10, by = 2.5),
                                   labels = c("min", rep("", 3), "max")) +
    ggplot2::labs(title = gene) +
    ggplot2::theme(aspect.ratio = 1,
                   plot.title = element_text(hjust = 0.5, size = 17),
                   plot.subtitle = element_text(hjust = 0.5, size = 15),
                   legend.text = element_text(size = 15),
                   legend.position = "none") +
    Seurat::NoAxes()
})

plot_list
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

Project name :

# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj_hfsc), replace = FALSE, size = ncol(sobj_hfsc))

# Extract coordinates
cells_coord = sobj_hfsc@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj_hfsc$project_name
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
  ggplot2::geom_point(size = 1.2) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$project_name) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

Cluster :

Seurat::DimPlot(sobj_hfsc, reduction = name2D, pt.size = 1,
                group.by = "seurat_clusters", cols = grey_palette,
                label = TRUE, label.size = 7) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

Heatmap with proportions :

cluster_markers = c("TGFB2", "ANGPTL7", "EPCAM", "KRT75", "NOTCH3", "PTHLH")

## Bottom annotation : gene expression by cluster
ht_annot = Seurat::FetchData(sobj_hfsc, slot = "data", vars = cluster_markers) %>%
  as.data.frame()
ht_annot$clusters = sobj_hfsc$seurat_clusters
ht_annot = ht_annot %>%
  dplyr::group_by(clusters) %>%
  dplyr::summarise_all(funs('mean' = mean)) %>%
  as.data.frame() %>%
  dplyr::select(-clusters) %>%
  `colnames<-`(c(cluster_markers))

ha_bottom = ComplexHeatmap::HeatmapAnnotation(df = ht_annot,
                                              which = "column",
                                              show_legend = TRUE,
                                              col = setNames(nm = cluster_markers,
                                                             lapply(cluster_markers, FUN = color_fun)),
                                              annotation_name_side = "left")

## Right annotation : number of cells by dataset
ht_annot = table(sobj_hfsc$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  `rownames<-`(.$sample_identifier) %>%
  dplyr::select(-sample_identifier)

ha_right = ComplexHeatmap::HeatmapAnnotation(
  df = ht_annot,
  which = "row",
  show_legend = TRUE,
  annotation_name_side = "bottom",
  col = list(nb_cells  = circlize::colorRamp2(colors = RColorBrewer::brewer.pal(name = "Greys", n = 9),
                                              breaks = seq(from = range(ht_annot$nb_cells)[1],
                                                           to = range(ht_annot$nb_cells)[2],
                                                           length.out = 9))))

## Heatmap
ht = aquarius::plot_prop_heatmap(df = sobj_hfsc@meta.data[, c("sample_identifier", "seurat_clusters")],
                                 bottom_annotation = ha_bottom,
                                 # right_annotation = ha_right,
                                 cluster_rows = TRUE,
                                 column_names_centered = TRUE,
                                 prop_margin = 1,
                                 row_names_gp = grid::gpar(names = sample_info$sample_identifier,
                                                           col = sample_info$color,
                                                           fontface = "bold"),
                                 row_title = "Sample",
                                 column_title = "Cluster")

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "bottom")

Heatmap for cluster 0 and 8 :

subsobj = subset(sobj_hfsc, seurat_clusters %in% c(0,8))

features_oi = rownames(list_results$cluster_0_8)
features_oi = features_oi[!grepl(features_oi, pattern = "^RP")]

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = cbind(mat_expr, subsobj$percent.mt)
colnames(mat_expr)[ncol(mat_expr)] = "percent.rb"
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## [1]  48 631
## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# list_colors[["seurat_clusters"]] = setNames(aquarius::gg_color_hue(length(levels(subsobj$seurat_clusters))),
#                                             nm = levels(subsobj$seurat_clusters))
# Cells order
column_order = subsobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           # clusters = subsobj$seurat_clusters,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]
                                      # clusters = list_colors[["seurat_clusters"]]
                           ))


# g1 : REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM
# g2 : GOBP_APOPTOTIC_PROCESS
g1_genes = c("B2M", "HLA-C", "HLA-A", "MIF", "PPIA", "JUNB", "IFITM3")
g2_genes = c("Jun", "ATF3", "BTG2", "RHOB", "NFKBIA", "SGK1", "KLF9",
             "CAV1", "DDIT4", "PDK4", "TXNIP", "RNF1152", "TLE1")
ha_right = data.frame(genes =  c(features_oi, "percent.rb"), rownames = c(features_oi, "percent.rb"))
ha_right$group = case_when(ha_right$genes %in% g1_genes ~ "REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM",
                           ha_right$genes %in% g2_genes ~ "GOBP_APOPTOTIC_PROCESS",
                           TRUE ~ "others")

list_colors[["group"]] = setNames(
  nm = c("REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM", "GOBP_APOPTOTIC_PROCESS", "others"),
  c("red", "black", "gray90"))

ha_right = HeatmapAnnotation(group = ha_right$group,
                             col = list(group = list_colors[["group"]]),
                             which = "row",
                             show_annotation_name = FALSE,
                             show_legend = TRUE)

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             right_annotation = ha_right,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 10, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "bottom",
                     annotation_legend_side = "bottom")

Violin plot of IFITM3 :

table(subsobj$sample_type)
## 
##  HS  HD 
## 588  43
ifitm3_hs = subsobj@assays$RNA@data["IFITM3", subsobj$sample_type == "HS"]
ifitm3_hd = subsobj@assays$RNA@data["IFITM3", subsobj$sample_type == "HD"]
ifitm3_hs_VS_ifitm3_hd = stats::t.test(ifitm3_hs, ifitm3_hd)
ifitm3_hs_VS_ifitm3_hd
## 
##  Welch Two Sample t-test
## 
## data:  ifitm3_hs and ifitm3_hd
## t = 7.0204, df = 47.675, p-value = 7.081e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.5509680 0.9933324
## sample estimates:
## mean of x mean of y 
##  1.775647  1.003497
Seurat::VlnPlot(subsobj, group.by = "sample_type",
                features = "IFITM3", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

Split by sample :

Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
                features = "IFITM3", cols = sample_info$color) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

Violin plot of DDIT4 :

table(subsobj$sample_type)
## 
##  HS  HD 
## 588  43
DDIT4_hs = subsobj@assays$RNA@data["DDIT4", subsobj$sample_type == "HS"]
DDIT4_hd = subsobj@assays$RNA@data["DDIT4", subsobj$sample_type == "HD"]
DDIT4_hs_VS_DDIT4_hd = stats::t.test(DDIT4_hs, DDIT4_hd)
DDIT4_hs_VS_DDIT4_hd
## 
##  Welch Two Sample t-test
## 
## data:  DDIT4_hs and DDIT4_hd
## t = -11.338, df = 46.368, p-value = 5.774e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.614197 -1.826081
## sample estimates:
## mean of x mean of y 
## 0.9376455 3.1577846
Seurat::VlnPlot(subsobj, group.by = "sample_type",
                features = "DDIT4", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

Split by sample :

Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
                features = "DDIT4", cols = sample_info$color) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

We represent some genes split by sample type :

plot_list = lapply(c("DDIT4", "IFITM3"), FUN = function(one_gene) {
  p = aquarius::plot_split_dimred(sobj_hfsc,
                                  reduction = name2D,
                                  split_by = "sample_type",
                                  color_by = one_gene,
                                  color_palette = c("gray70", "#FDBB84", "#EF6548", "#7F0000", "black"),
                                  main_pt_size = 0.6,
                                  bg_pt_size = 0.6,
                                  order = TRUE,
                                  bg_color = "gray95")
  p = patchwork::wrap_plots(p, nrow = 1) +
    patchwork::plot_layout(guides = "collect") +
    ggplot2::theme(legend.position = "right") &
    ggplot2::theme(plot.subtitle = element_blank())
  return(p)
})

patchwork::wrap_plots(plot_list, ncol = 2)

Barplot with number of HFSCs and total number of cells :

quantif = dplyr::left_join(
  x = table(sobj$sample_identifier) %>%
    as.data.frame.table() %>%
    `colnames<-`(c("Sample", "nb_cells")),
  y = table(sobj_hfsc$sample_identifier) %>%
    as.data.frame.table() %>%
    `colnames<-`(c("Sample", "nb_hfsc")),
  by = "Sample") %>%
  dplyr::mutate(prop_hfsc = round(100*nb_hfsc / nb_cells, 2))

quantif_to_plot = rbind.data.frame(
  data.frame(Sample = quantif$Sample,
             nb_cells = quantif$nb_cells - quantif$nb_hfsc,
             cell_type = "others",
             stringsAsFactors = FALSE),
  data.frame(Sample = quantif$Sample,
             nb_cells = quantif$nb_hfsc,
             cell_type = "hfsc",
             stringsAsFactors = FALSE)) %>%
  dplyr::mutate(cell_type = factor(cell_type, levels = c("others", "hfsc")))

aquarius::plot_barplot(df = quantif_to_plot,
                       x = "Sample", y = "nb_cells", fill = "cell_type",
                       position = position_fill()) +
  ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
                      aes(x = .data$Sample, y = 0.05+.data$prop_hfsc/100, label = .data$nb_hfsc),
                      label.size = 0, size = 5, fill = NA) +
  ggplot2::scale_fill_manual(values = c("gray90", color_markers[["HFSC"]]),
                             breaks = c("others", "hfsc"),
                             name = "Cell Type") +
  ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
                              labels = paste0(seq(0, 100, by = 25), sep = " %"),
                              expand = ggplot2::expansion(add = c(0, 0.05))) +
  ggplot2::theme(axis.title.y = element_blank(),
                 axis.line.x = element_line(colour = "lightgray"),
                 text = element_text(size = 15),
                 legend.position = "none")

IBL and ORS

Settings

We load the IBL + ORS dataset :

sobj_iblors = readRDS(paste0(data_dir, "/4_zoom/3_zoom_iblmors/iblmors_sobj.rds"))
sobj_iblors
## An object of class Seurat 
## 16701 features across 3532 samples within 1 assay 
## Active assay: RNA (16701 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne, RNA_pca_20_umap, harmony, harmony_20_umap, harmony_20_tsne

This is the projection name to visualize cells :

name2D = "harmony_20_tsne"

To represent results from differential expression, we load the analyses results :

list_results = readRDS(paste0(data_dir, "/4_zoom/3_zoom_iblmors/iblmors_list_results.rds"))

lapply(list_results, FUN = names)
## $IBL_vs_ORS
## [1] "mark"        "enrichr_ibl" "enrichr_ors" "gsea"       
## 
## $cluster5_vs_ORS
## [1] "mark"       "enrichr_up" "enrichr_dn" "gsea"      
## 
## $IBL_HS_vs_HD
## [1] "mark"       "enrichr_hs" "enrichr_hd" "gsea"      
## 
## $ORS_HS_vs_HD
## [1] "mark"       "enrichr_hs" "enrichr_hd" "gsea"

Preparation

We defined cluster type :

cluster_type = table(sobj_iblors$cell_type, sobj_iblors$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj_iblors$cell_type)[cluster_type])

sobj_iblors$cluster_type = cluster_type[sobj_iblors$seurat_clusters]
sobj_iblors$cluster_type = factor(sobj_iblors$cluster_type,
                                  levels = c("IBL", "ORS"))

Figures

IBL + ORS on the full atlas :

sobj$cell_bc = colnames(sobj)
sobj_iblors$cell_bc = colnames(sobj_iblors)
sobj$is_iblors = dplyr::left_join(sobj@meta.data[, c("cell_bc", "percent.mt")],
                                  sobj_iblors@meta.data[, c("cell_bc", "cluster_type")],
                                  by = "cell_bc")[, "cluster_type"]

Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_iblors", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[c("IBL", "ORS")], bg_color),
                              breaks = c("IBL", "ORS", NA), na.value = bg_color) +
  ggplot2::labs(title = "IBL + ORS",
                subtitle = paste0(ncol(sobj_iblors), " cells")) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_text(hjust = 0.5, face = "bold"),
                 plot.subtitle = element_text(hjust = 0.5)) +
  Seurat::NoAxes() + Seurat::NoLegend()

Project name :

# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj_iblors), replace = FALSE, size = ncol(sobj_iblors))

# Extract coordinates
cells_coord = sobj_iblors@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj_iblors$project_name
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
  ggplot2::geom_point(size = 1.2) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$project_name) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

Cluster :

Seurat::DimPlot(sobj_iblors, reduction = name2D, pt.size = 1,
                group.by = "seurat_clusters", cols = grey_palette,
                label = TRUE, label.size = 8) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

Cluster type :

Seurat::DimPlot(sobj_iblors, reduction = name2D, pt.size = 1,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

Cluster type split by sample type :

plot_list = aquarius::plot_split_dimred(sobj_iblors, reduction = name2D,
                                        group_by = "cluster_type",
                                        group_color = color_markers,
                                        split_by = "sample_type",
                                        bg_pt_size = 1, main_pt_size = 1,
                                        bg_color = bg_color)

patchwork::wrap_plots(plot_list) &
  Seurat::NoLegend()

Number of cells by cluster and by sample :

table(sobj_iblors$seurat_clusters,
      sobj_iblors$sample_identifier)
##     
##      HS_1 HS_2 HS_3 HS_4 HS_5 HD_1 HD_2
##   0    49   15   18  354   82   68   83
##   1    14    0   28  205   49   68   74
##   2    67   17  191   19   38   41   29
##   3    40   18    2  204   24   39   57
##   4    14    2   76   18  215    1   36
##   5    84   83    1   69   28   20   17
##   6    40    9  108   23   76   26   20
##   7    27    7   51   90   21   10   23
##   8    28    7   77   11   20   28   13
##   9    11    4   41   11   73   10    8
##   10   16    3   37    6   26   10    4
table(sobj_iblors$cluster_type,
      sobj_iblors$sample_identifier)
##      
##       HS_1 HS_2 HS_3 HS_4 HS_5 HD_1 HD_2
##   IBL  176   42  530   88  448  116  110
##   ORS  214  123  100  922  204  205  254

Separate cluster 5 :

sobj_iblors$cluster_type_sep5 = ifelse(sobj_iblors$seurat_clusters == 5,
                                       yes = "ORS_5",
                                       no = as.character(sobj_iblors$cluster_type)) %>%
  as.factor()

table(sobj_iblors$cluster_type_sep5,
      sobj_iblors$sample_type)
##        
##           HS   HD
##   IBL   1284  226
##   ORS   1298  422
##   ORS_5  265   37

Barplot by cluster family :

quantif = table(sobj_iblors$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "nb_cells"))

quantif_to_plot = table(sobj_iblors$sample_identifier,
                        sobj_iblors$cluster_type_sep5) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "CellType", "Number")) %>%
  dplyr::mutate(Style = ifelse(CellType == "ORS_5", yes = "IL1R2+", no = "IL1R2-")) %>%
  dplyr::mutate(Style = factor(Style, levels = c("IL1R2-", "IL1R2+"))) %>%
  dplyr::mutate(CellType = ifelse(CellType == "ORS_5", yes = "ORS", no = as.character(CellType))) %>%
  `colnames<-`(c("Sample", "Cell Type", "Number", "IL1R2 status"))

aquarius::plot_barplot(df = quantif_to_plot,
                       x = "Sample", y = "Number",
                       fill = "Cell Type", pattern = "IL1R2 status",
                       position = position_fill()) +
  ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
                      aes(x = .data$Sample, y = 1.05, label = .data$nb_cells),
                      label.size = 0, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(color_markers[levels(sobj_iblors$cluster_type)]),
                             breaks = names(color_markers[levels(sobj_iblors$cluster_type)]),
                             name = "Cell Type") +
  ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
                              labels = paste0(seq(0, 100, by = 25), sep = " %"),
                              expand = ggplot2::expansion(add = c(0, 0.05))) +
  ggplot2::theme(axis.title.y = element_blank(),
                 axis.line.x = element_line(colour = "lightgray"),
                 text = element_text(size = 15),
                 legend.position = "right")

Focus on cluster 5, among ORS :

subsobj = subset(sobj_iblors, cluster_type == "ORS")
subsobj$cluster_type_sep5 = ifelse(subsobj$seurat_clusters == 5,
                                   yes = "ORS_5",
                                   no = as.character(subsobj$cluster_type)) %>%
  as.factor()

quantif = table(subsobj$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "nb_cells"))

quantif_to_plot = table(subsobj$sample_identifier,
                        subsobj$cluster_type_sep5) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "CellType", "Number")) %>%
  dplyr::mutate(Style = ifelse(CellType == "ORS_5", yes = "IL1R2+", no = "IL1R2-")) %>%
  dplyr::mutate(Style = factor(Style, levels = c("IL1R2-", "IL1R2+"))) %>%
  dplyr::mutate(CellType = ifelse(CellType == "ORS_5", yes = "ORS", no = as.character(CellType))) %>%
  `colnames<-`(c("Sample", "Cell Type", "Number", "IL1R2 status")) %>%
  dplyr::left_join(., y = quantif, by = "Sample") %>%
  dplyr::mutate(prop_cluster5 = round(Number/nb_cells, 4))

aquarius::plot_barplot(df = quantif_to_plot,
                       x = "Sample", y = "Number",
                       fill = "IL1R2 status",
                       position = position_fill()) +
  # ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
  #                     aes(x = .data$Sample, y = 1.05, label = .data$nb_cells),
  #                     label.size = 0, size = 5) +
  ggplot2::geom_label(data = quantif_to_plot %>%
                        dplyr::filter(`IL1R2 status` == "IL1R2+"),
                      inherit.aes = FALSE,
                      aes(x = .data$Sample, y = prop_cluster5 + 0.05, label = .data$Number),
                      label.size = 0, size = 5, fill = NA, col = "white") +
  ggplot2::scale_fill_manual(values = c("black", color_markers[["ORS"]]),
                             breaks = c("IL1R2+", "IL1R2-"),
                             name = "IL1R2 status") +
  ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
                              labels = paste0(seq(0, 100, by = 25), sep = " %"),
                              expand = ggplot2::expansion(add = c(0, 0.05))) +
  ggplot2::theme(axis.title.y = element_blank(),
                 axis.line.x = element_line(colour = "lightgray"),
                 text = element_text(size = 15),
                 legend.position = "right")

DE genes between IBL and ORS :

mark = list_results$IBL_vs_ORS$mark
mark$gene_name = rownames(mark)
mark_label = rbind(
  # up-regulated in IBL
  mark %>% dplyr::top_n(., n = 20, wt = avg_logFC),
  # up-regulated in ORS
  mark %>% dplyr::top_n(., n = 20, wt = -avg_logFC),
  # representative and selective for IBL
  mark %>% dplyr::top_n(., n = 20, wt = (pct.1 - pct.2)),
  # representative and selective for ORS
  mark %>% dplyr::top_n(., n = 20, wt = -(pct.1 - pct.2))) %>%
  dplyr::distinct()
mark_label = mark_label[!grepl(rownames(mark_label), pattern = "^MT"), ]

avg_logFC_range = setNames(c(min(mark_label$avg_logFC), -1, 0, 1, max(mark_label$avg_logFC)),
                           nm = c("dodgerblue4", "dodgerblue3", "#B7B7B7", "firebrick3", "firebrick4"))


ggplot2::ggplot(mark, aes(x = pct.1, y = pct.2, col = avg_logFC)) +
  ggplot2::geom_abline(slope = 1, intercept = 0, lty = 2) +
  ggplot2::geom_point() +
  ggrepel::geom_label_repel(data = mark_label, max.overlaps = Inf,
                            aes(x = pct.1, y = pct.2, label = gene_name),
                            col = "black", fill = NA, size = 3.5, label.size = NA) +
  ggplot2::labs(x = "Enriched in IBL",
                y = "Enriched in ORS") +
  ggplot2::scale_color_gradientn(colors = names(avg_logFC_range),
                                 values = scales::rescale(unname(avg_logFC_range))) +
  ggplot2::theme_classic() +
  ggplot2::theme(aspect.ratio = 1)

GSEA plot :

the_gs_name = "REACTOME_KERATINIZATION" 
the_content = list_results$IBL_vs_ORS$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      " | pvalue : ", round(the_content$pvalue, 4),
                      " | set size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))

the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_content = list_results$IBL_vs_ORS$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      " | pvalue : ", round(the_content$pvalue, 4),
                      " | set size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))

Score for both gene sets, in all cells :

gene_sets = aquarius::get_gene_sets(species = "Homo sapiens")
the_gs_name = "REACTOME_KERATINIZATION" 
the_gs_content = gene_sets$gene_sets_full %>%
  dplyr::filter(gs_name == the_gs_name) %>%
  dplyr::pull(gene_symbol) %>%
  unlist()

sobj_iblors$score_kera = Seurat::AddModuleScore(sobj_iblors,
                                                features = list(the_gs_content))$Cluster1

Seurat::VlnPlot(sobj_iblors, features = "score_kera", pt.size = 0.05,
                split.by = "sample_type", group.by = "cluster_type",
                cols = rev(sample_type_colors)) +
  ggplot2::labs(title = the_gs_name) +
  ggplot2::theme(axis.title.x = element_blank())

the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_gs_content = gene_sets$gene_sets_full %>%
  dplyr::filter(gs_name == the_gs_name) %>%
  dplyr::pull(gene_symbol) %>%
  unlist()

sobj_iblors$score_ifna = Seurat::AddModuleScore(sobj_iblors,
                                                features = list(the_gs_content))$Cluster1

Seurat::VlnPlot(sobj_iblors, features = "score_ifna", pt.size = 0.05,
                split.by = "sample_type", group.by = "cluster_type",
                cols = rev(sample_type_colors)) +
  ggplot2::labs(title = the_gs_name) +
  ggplot2::theme(axis.title.x = element_blank())

Violin plot for IBL :

subsobj = subset(sobj_iblors, cluster_type == "IBL")

Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.05,
                features = c("DUSP1", "DDIT4", "MIF", "LGALS7", "ARF5", "S100A9"),
                cols = sample_type_colors, ncol = 6) &
  ggplot2::theme(axis.title.x = element_blank(),
                 axis.title.y = element_blank(),
                 legend.position = "none")

Violin plot for ORS :

subsobj = subset(sobj_iblors, cluster_type == "ORS")

Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.05,
                features = c("DUSP1", "KLF6", "CLDN1", "CTGF",
                             "S100A9", "CCL2", "IFITM3", "IFI27"),
                cols = sample_type_colors, ncol = 8) &
  ggplot2::theme(axis.title.x = element_blank(),
                 axis.title.y = element_blank(),
                 legend.position = "none")

Split by sample :

Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
                features = "IFI27", cols = sample_info$color) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")

Heatmap for cluster 5 vs other ORS :

subsobj = subset(sobj_iblors, cluster_type == "ORS")

features_oi = c("YBX3", "TXNIP", "KRT14", "KRT15", "NEAT1",
                "FXYD3", "MT2A", "MT1E", "MT1X", "AQP3", "GLUL",
                # "HALLMARK_TNFA_SIGNALING_VIA_NFKB"
                "FOS", "JUNB", "DUSP1", "ZFP36", "NFKBIZ",
                "ATF3", "RHOB",  "ETS2", "IL18", "KLF4", "KLF6", "KLF9",
                "KLF3", "KLF5", "COL17A1", "THSD4", "WNT3", "WNT4", "SLPI", "PLAT",
                "LAMB4", "DCN", "SPINK5",
                "GSTM3", "ALDH3A1",  "LGALS7B", "SLC38A2", "EHF",  "CLEC2B",
                "IL20RB", "IL1R2", "IFI27", "CXCL14", "HLA-C", "GPSM2", "DAAM1",   "ID1",
                "RNASET2", "HOPX", "POU3F1", "SPRY1", "AR", "PDGFC",
                "WFDC2", "WFDC5", "TSC22D3", "FGFR3",  "LY6D", "IGFBP3", 
                # Other ORS
                "APOE", "CTSB", "CALD1", "SOX4",
                "STMN1", "LMO4", "CEBPB", "TMEM45A", "GPX2", "C1QTNF12", "GJB6",
                "KRT6A", "KRT17", "RBP1", "CALML3", "PTN", "DAPK2",
                "EGLN3", "FILIP1L", "ADGRL3", "FST", "EFNB2", "SEMA5A",
                "FGFR1", "EGR2", "CLDN1", "DEFB1", "CARD18", "MGST1")

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## [1]   89 2022
## Colors
list_colors = list()
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
list_colors[["population"]] = setNames(nm = c("IL1R2+ ORS", "other ORS"),
                                       c("black", color_markers["ORS"]))
list_colors[["nFeature_RNA"]] = circlize::colorRamp2(breaks = seq(from = min(subsobj$nFeature_RNA),
                                                                  to = max(subsobj$nFeature_RNA),
                                                                  length.out = 9),
                                                     colors = RColorBrewer::brewer.pal(name = "Greys", n = 9))

# Cells order
column_order = subsobj@meta.data %>%
  dplyr::mutate(seurat_clusters = factor(seurat_clusters, levels = c(5, 3, 0, 1, 7))) %>%
  dplyr::arrange(sample_type, seurat_clusters, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Annotation
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           population = ifelse(subsobj$cluster_type_sep5 == "ORS",
                                               yes = "other ORS", no = "IL1R2+ ORS"),
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]],
                                      population = list_colors[["population"]]))

ha_bottom = HeatmapAnnotation(nFeature_RNA = subsobj$nFeature_RNA,
                              col = list(nFeature_RNA = list_colors[["nFeature_RNA"]]))

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             bottom_annotation = ha_bottom,
             # Cell grouping
             column_split = subsobj$sample_type %>% as.character(),
             cluster_columns = FALSE,
             column_order = column_order,
             column_title = NULL,
             show_column_dend = FALSE,
             show_column_names = FALSE,
             # Genes
             cluster_rows = FALSE,
             row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
             # Style
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "right",
                     annotation_legend_side = "right")

Genes of interest :

genes = c("KRT16", "COL17A1", "DST", "KRT6B", "IL1R2", "WNT3",
          "IFI27", "CXCL14", "IGFBP3", "KRT15", "CD200", "IL18")

plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
  gene = genes[[gene_id]]
  
  sobj_iblors$my_gene = Seurat::FetchData(sobj_iblors, gene)[, 1] %>%
    aquarius::run_rescale(., new_min = 0, new_max = 10)
  
  Seurat::FeaturePlot(sobj_iblors, features = "my_gene", reduction = name2D, pt.size = 0.25) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
                                   breaks = seq(0, 10, by = 2.5),
                                   labels = c("min", rep("", 3), "max")) +
    ggplot2::labs(title = gene) +
    ggplot2::theme(aspect.ratio = 1,
                   plot.title = element_text(hjust = 0.5, size = 17),
                   plot.subtitle = element_text(hjust = 0.5, size = 15),
                   legend.text = element_text(size = 15),
                   legend.position = "none") +
    Seurat::NoAxes()
})

plot_list
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

HFSCs to IBL and ORS

Settings

We load the merged dataset :

sobj_traj = readRDS(paste0(data_dir, "/4_zoom/4_zoom_hfsc_iblmors/hfsc_iblmors_sobj_traj_tinga.rds"))
sobj_traj
## An object of class Seurat 
## 17050 features across 4986 samples within 1 assay 
## Active assay: RNA (17050 features, 2000 variable features)
##  8 dimensional reductions calculated: RNA_pca, RNA_pca_18_tsne, RNA_pca_18_umap, harmony, harmony_18_umap, harmony_18_tsne, harmony_dm, harmony_dm_5_umap

This is the projection name to visualize cells :

name2D = "harmony_dm"

We load the trajectory object for visualisation purpose :

my_traj = readRDS(paste0(data_dir, "/4_zoom/4_zoom_hfsc_iblmors/hfsc_iblmors_my_traj_tinga.rds"))
class(my_traj)
## [1] "dynwrap::with_dimred"     "dynwrap::with_trajectory"
## [3] "dynwrap::data_wrapper"    "list"

Preparation

We defined cell type based on individual object :

sobj_iblors$cell_bc = colnames(sobj_iblors)
sobj_traj$cell_bc = colnames(sobj_traj)
sobj_traj$cluster_type = dplyr::left_join(sobj_traj@meta.data[, c("cell_bc", "percent.mt")],
                                          sobj_iblors@meta.data[, c("cell_bc", "cluster_type")],
                                          by = "cell_bc")[, "cluster_type"] %>% as.character()
sobj_traj$cluster_type = ifelse(colnames(sobj_traj) %in% colnames(sobj_hfsc),
                                yes = "HFSC",
                                no = sobj_traj$cluster_type) %>%
  as.factor()

Figures

Cells on the full atlas :

sobj$cell_bc = colnames(sobj)
sobj_traj$cell_bc = colnames(sobj_traj)
sobj$is_traj = dplyr::left_join(sobj@meta.data[, c("cell_bc", "percent.mt")],
                                sobj_traj@meta.data[, c("cell_bc", "cluster_type")],
                                by = "cell_bc")[, "cluster_type"]

Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_traj", order = levels(sobj_traj$cluster_type)) +
  ggplot2::scale_color_manual(values = c(color_markers[c("IBL", "ORS", "HFSC")], bg_color),
                              breaks = c("IBL", "ORS", "HFSC", NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() + Seurat::NoLegend()

Project name :

# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj_traj), replace = FALSE, size = ncol(sobj_traj))

# Extract coordinates
cells_coord = sobj_traj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_type = sobj_traj$sample_type
cells_coord = cells_coord[order(sobj_traj$sample_type), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_type)) +
  ggplot2::geom_point(size = 1.2) +
  ggplot2::scale_color_manual(values = sample_type_colors,
                              breaks = names(sample_type_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

Cluster type :

Seurat::DimPlot(sobj_traj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

Pseudotime :

Seurat::FeaturePlot(sobj_traj, reduction = name2D, pt.size = 0.5,
                    features = "pseudotime") +
  ggplot2::scale_color_gradientn(colors = viridis::viridis(n = 100)) +
  ggplot2::lims(x = range(sobj_traj@reductions[[name2D]]@cell.embeddings[, 1]),
                y = range(sobj_traj@reductions[[name2D]]@cell.embeddings[, 2])) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes()

Pseudotime with dynplot’s function :

dynplot::plot_dimred(trajectory = my_traj,
                     dimred = sobj_traj[[name2D]]@cell.embeddings,
                     # Cells
                     color_cells = 'pseudotime',
                     size_cells = 1.6,
                     border_radius_percentage = 0,
                     # Trajectory
                     plot_trajectory = TRUE,
                     color_trajectory = "none",
                     label_milestones = FALSE,
                     size_milestones = 0,
                     size_transitions = 1)

OEP002321 dataset

Settings

We load the dataset containing all cells :

sobj = readRDS(paste0(data_dir, "/5_wu/3_combined/wu_sobj.rds"))
sobj
## An object of class Seurat 
## 17727 features across 17762 samples within 1 assay 
## Active assay: RNA (17727 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_38_tsne, RNA_pca_38_umap, harmony, harmony_38_umap, harmony_38_tsne

This is the projection name to visualize cells :

name2D = "harmony_38_tsne"
name2D_atlas = name2D

These are all the samples analyzed :

sample_info = readRDS(paste0(data_dir, "/5_wu/1_metadata/wu_sample_info.rds"))

# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
  as.data.frame.table(., stringsAsFactors = FALSE) %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  dplyr::left_join(x = ., y = sample_info, by = "sample_identifier") 

# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
  patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))

We define cluster type and cluster family :

sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type)) %>%
  base::droplevels()
sobj$cluster_family = custom_order_cell_type[sobj$cluster_type, "cell_family"]
sobj$cluster_family = factor(sobj$cluster_family,
                             levels = names(family_color))

Global figures

Project name :

# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D_atlas]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj$project_name
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$project_name) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

Cell type annotation :

Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

Cluster type annotation :

Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

Gene expression to assess annotation :

genes = c("PTPRC", "MSX2", "KRT14")
names(genes) = c("immune cells", "matrix cells", "non-matrix cells")

plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
  gene = genes[[gene_id]]
  pop = names(genes)[gene_id]
  
  sobj$my_gene = Seurat::FetchData(sobj, gene)[, 1] %>%
    aquarius::run_rescale(., new_min = 0, new_max = 10)
  
  Seurat::FeaturePlot(sobj, features = "my_gene", reduction = name2D_atlas) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
                                   breaks = seq(0, 10, by = 2.5),
                                   labels = c("min", rep("", 3), "max")) +
    ggplot2::labs(title = gene) + 
    # subtitle = pop) +
    ggplot2::theme(aspect.ratio = 1,
                   plot.title = element_text(hjust = 0.5, size = 17),
                   plot.subtitle = element_text(hjust = 0.5, size = 15),
                   legend.text = element_text(size = 15),
                   legend.position = "none") +
    Seurat::NoAxes()
})

plot_list
## [[1]]

## 
## [[2]]

## 
## [[3]]

Dotplot :

custom_order_cell_type = custom_order_cell_type[levels(sobj$cluster_type), c("cell_type", "cell_family")]

plot_list = Seurat::DotPlot(sobj,
                            features = c("PTPRC",
                                         "CD3E", "CD4",
                                         "CD207", "AIF1",
                                         # "PRDM1", "KRT85",
                                         "MSX2",
                                         "KRT32", "KRT35",
                                         "KRT31", "PRR9",
                                         "BAMBI", "ALDH1A3",
                                         "KRT71", "KRT73",
                                         "TOP2A", "MCM5",
                                         "KRT14", "CXCL14",
                                         "KRT15", "COL17A1",
                                         "DIO2", "TCEAL2",
                                         "KRT16", "KRT6C",
                                         "SPINK5", "LY6D"),
                            group.by = "cluster_type", scale = TRUE,
                            scale.by = "radius", scale.min = NA, scale.max = NA) +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(legend.position = "bottom",
                 legend.direction = "vertical",
                 # legend.justification = "bottom",
                 legend.box = "horizontal",
                 legend.box.margin = margin(0,25,0,0),
                 axis.title = element_blank(),
                 axis.ticks.y = element_blank(),
                 axis.text.y = element_blank(),
                 axis.line.y = element_blank(),
                 axis.text.x = element_text(angle = 45, hjust = 1, size = 11, color = "black"),
                 plot.margin = unit(c(0,0.5,0,0), "cm"))

p = ggplot2::ggplot(custom_order_cell_type, aes(y = cell_type, x = 0)) +
  ggplot2::geom_point(size = 0) +
  ggplot2::geom_segment(aes(y = 0.5, yend = 2.5, x = 0, xend = 0), size = 6, col = family_color["immune cells"]) +
  ggplot2::geom_segment(aes(y = 2.5, yend = 7.5, x = 0, xend = 0), size = 6, col = family_color["matrix"]) +
  ggplot2::geom_segment(aes(y = 7.5, yend = 11.5, x = 0, xend = 0), size = 6, col = family_color["non matrix"]) +
  ggplot2::scale_x_continuous(expand = c(0,0), limits = c(0,0)) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.title = element_blank(),
                 axis.line.x = element_blank(),
                 axis.text.y = element_text(size = 12, color = "black"),
                 plot.margin = unit(c(0.5,0,0,0.5), "cm"))

plot_list = patchwork::wrap_plots(p, plot_list,
                                  nrow = 1, widths = c(1, 25))
plot_list

IBL and ORS dataset

We load the IBL + ORS dataset :

sobj_iblors = readRDS(paste0(data_dir, "/5_wu/4_ibl_ors/iblmors_sobj.rds"))
sobj_iblors
## An object of class Seurat 
## 15541 features across 8026 samples within 1 assay 
## Active assay: RNA (15541 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne, RNA_pca_20_umap, harmony, harmony_20_umap, harmony_20_tsne

This is the projection name to visualize cells :

name2D = "harmony_20_tsne"

To represent results from differential expression, we load the analyses results :

list_results = readRDS(paste0(data_dir, "/5_wu/4_ibl_ors/iblmors_list_results.rds"))

lapply(list_results, FUN = names)
## $IBL_vs_ORS
## [1] "mark"        "enrichr_ibl" "enrichr_ors" "gsea"

We defined cluster type :

cluster_type = table(sobj_iblors$cell_type, sobj_iblors$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj_iblors$cell_type)[cluster_type])

sobj_iblors$cluster_type = cluster_type[sobj_iblors$seurat_clusters]
sobj_iblors$cluster_type = factor(sobj_iblors$cluster_type,
                                  levels = c("IBL", "ORS"))

IBL + ORS figure

IBL + ORS on the full atlas :

sobj$cell_bc = colnames(sobj)
sobj_iblors$cell_bc = colnames(sobj_iblors)
sobj$is_iblors = dplyr::left_join(sobj@meta.data[, c("cell_bc", "percent.mt")],
                                  sobj_iblors@meta.data[, c("cell_bc", "cluster_type")],
                                  by = "cell_bc")[, "cluster_type"]

Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_iblors", order = FALSE) +
  ggplot2::scale_color_manual(values = c(color_markers[c("IBL", "ORS")], bg_color),
                              breaks = c("IBL", "ORS", NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()

Cluster type :

Seurat::DimPlot(sobj_iblors, reduction = name2D, pt.size = 1,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

DE genes between IBL and ORS :

mark = list_results$IBL_vs_ORS$mark
mark$gene_name = rownames(mark)
mark_label = rbind(
  # up-regulated in IBL
  mark %>% dplyr::top_n(., n = 20, wt = avg_logFC),
  # up-regulated in ORS
  mark %>% dplyr::top_n(., n = 20, wt = -avg_logFC),
  # representative and selective for IBL
  mark %>% dplyr::top_n(., n = 20, wt = (pct.1 - pct.2)),
  # representative and selective for ORS
  mark %>% dplyr::top_n(., n = 20, wt = -(pct.1 - pct.2))) %>%
  dplyr::distinct()
mark_label = mark_label[!grepl(rownames(mark_label), pattern = "^MT"), ]

avg_logFC_range = setNames(c(min(mark_label$avg_logFC), -1, 0, 1, max(mark_label$avg_logFC)),
                           nm = c("dodgerblue4", "dodgerblue3", "#B7B7B7", "firebrick3", "firebrick4"))


ggplot2::ggplot(mark, aes(x = pct.1, y = pct.2, col = avg_logFC)) +
  ggplot2::geom_abline(slope = 1, intercept = 0, lty = 2) +
  ggplot2::geom_point() +
  ggrepel::geom_label_repel(data = mark_label, max.overlaps = Inf,
                            aes(x = pct.1, y = pct.2, label = gene_name),
                            col = "black", fill = NA, size = 3.5, label.size = NA) +
  ggplot2::labs(x = "Enriched in IBL",
                y = "Enriched in ORS") +
  ggplot2::scale_color_gradientn(colors = names(avg_logFC_range),
                                 values = scales::rescale(unname(avg_logFC_range))) +
  ggplot2::theme_classic() +
  ggplot2::theme(aspect.ratio = 1)

GSEA plot :

the_gs_name = "REACTOME_KERATINIZATION" 
the_content = list_results$IBL_vs_ORS$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      " | pvalue : ", round(the_content$pvalue, 4),
                      " | set size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))

the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_content = list_results$IBL_vs_ORS$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      " | pvalue : ", round(the_content$pvalue, 4),
                      " | set size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))

Genes of interest :

genes = c("KRT16", "COL17A1", "DST", "KRT6B", "IL1R2", "WNT3",
          "IFI27", "CXCL14", "IGFBP3", "KRT15", "CD200")

plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
  gene = genes[[gene_id]]
  
  sobj_iblors$my_gene = Seurat::FetchData(sobj_iblors, gene)[, 1] %>%
    aquarius::run_rescale(., new_min = 0, new_max = 10)
  
  Seurat::FeaturePlot(sobj_iblors, features = "my_gene", reduction = name2D, pt.size = 0.25) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
                                   breaks = seq(0, 10, by = 2.5),
                                   labels = c("min", rep("", 3), "max")) +
    ggplot2::labs(title = gene) +
    ggplot2::theme(aspect.ratio = 1,
                   plot.title = element_text(hjust = 0.5, size = 17),
                   plot.subtitle = element_text(hjust = 0.5, size = 15),
                   legend.text = element_text(size = 15),
                   legend.position = "none") +
    Seurat::NoAxes()
})

plot_list
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

Supplementary tables

In this section, we save files to associate with the manuscript, as supplementary tables.

Table S2 (package version)

We load the table :

package_version = read.table(paste0(".", "/data/info_to_install_2023_04_17.txt"),
                             header = TRUE)
head(package_version)
##   order package_name  version
## 1     1      acepack    1.4.1
## 2     2    ADGofTest      0.3
## 3     3    backports    1.2.1
## 4     4    base64enc    0.1-3
## 5     5           BH 1.72.0-3
## 6     6          bit    4.0.4
##                                                                              url
## 1                    https://cran.r-project.org/src/contrib/acepack_1.4.1.tar.gz
## 2                    https://cran.r-project.org/src/contrib/ADGofTest_0.3.tar.gz
## 3 http://cran.r-project.org/src/contrib/Archive/backports/backports_1.2.1.tar.gz
## 4                  https://cran.r-project.org/src/contrib/base64enc_0.1-3.tar.gz
## 5            http://cran.r-project.org/src/contrib/Archive/BH/BH_1.72.0-3.tar.gz
## 6             http://cran.r-project.org/src/contrib/Archive/bit/bit_4.0.4.tar.gz
##           type
## 1    CRAN_last
## 2    CRAN_last
## 3 CRAN_archive
## 4    CRAN_last
## 5 CRAN_archive
## 6 CRAN_archive

Columns correspond to :

  • order : order to install package such that one never need to install dependencies
  • package_name : package name
  • version : installed version, on the local machine
  • url : the package was installed in the Singularity container using this url

We save this table, except the last column (just for me) :

openxlsx::write.xlsx(package_version[, c(1:4)],
                     file = paste0(".", "/data/Supplementary Table 2.xlsx"))

Table S3 (cell type annotation)

We load the table :

cell_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_cell_markers.rds"))
lengths(cell_markers)
##      CD4 T cells      CD8 T cells Langerhans cells      macrophages 
##               13               13                9               10 
##          B cells          cuticle           cortex          medulla 
##               16               15               16               10 
##              IRS    proliferative              IBL              ORS 
##               16               20               15               16 
##              IFE             HFSC      melanocytes        sebocytes 
##               17               17               10                8

We save this table, except the last column (just for me) :

openxlsx::write.xlsx(cell_markers,
                     file = paste0(".", "/data/Supplementary Table 3.xlsx"))

R Session

show
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
## 
## locale:
## [1] C
## 
## attached base packages:
##  [1] parallel  stats4    grid      stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] org.Mm.eg.db_3.10.0   AnnotationDbi_1.48.0  IRanges_2.20.2       
##  [4] S4Vectors_0.24.4      Biobase_2.46.0        BiocGenerics_0.32.0  
##  [7] ComplexHeatmap_2.14.0 ggplot2_3.3.5         patchwork_1.1.2      
## [10] dplyr_1.0.7          
## 
## loaded via a namespace (and not attached):
##   [1] softImpute_1.4              graphlayouts_0.7.0         
##   [3] pbapply_1.4-2               lattice_0.20-41            
##   [5] haven_2.3.1                 dyndimred_1.0.3            
##   [7] vctrs_0.3.8                 usethis_2.0.1              
##   [9] dynwrap_1.2.1               blob_1.2.1                 
##  [11] survival_3.2-13             prodlim_2019.11.13         
##  [13] dynutils_1.0.5              later_1.3.0                
##  [15] DBI_1.1.1                   R.utils_2.11.0             
##  [17] SingleCellExperiment_1.8.0  rappdirs_0.3.3             
##  [19] uwot_0.1.8                  dqrng_0.2.1                
##  [21] jpeg_0.1-8.1                zlibbioc_1.32.0            
##  [23] pspline_1.0-18              pcaMethods_1.78.0          
##  [25] mvtnorm_1.1-1               htmlwidgets_1.5.4          
##  [27] GlobalOptions_0.1.2         future_1.22.1              
##  [29] UpSetR_1.4.0                laeken_0.5.2               
##  [31] leiden_0.3.3                clustree_0.4.3             
##  [33] lmds_0.1.0                  scater_1.14.6              
##  [35] irlba_2.3.3                 markdown_1.1               
##  [37] DEoptimR_1.0-9              tidygraph_1.1.2            
##  [39] Rcpp_1.0.9                  readr_2.0.2                
##  [41] KernSmooth_2.23-17          carrier_0.1.0              
##  [43] promises_1.1.0              gdata_2.18.0               
##  [45] DelayedArray_0.12.3         limma_3.42.2               
##  [47] graph_1.64.0                RcppParallel_5.1.4         
##  [49] Hmisc_4.4-0                 fs_1.5.2                   
##  [51] RSpectra_0.16-0             fastmatch_1.1-0            
##  [53] ranger_0.12.1               digest_0.6.25              
##  [55] png_0.1-7                   sctransform_0.2.1          
##  [57] cowplot_1.0.0               DOSE_3.12.0                
##  [59] here_1.0.1                  TInGa_0.0.0.9000           
##  [61] dynplot_1.1.0               ggraph_2.0.3               
##  [63] pkgconfig_2.0.3             GO.db_3.10.0               
##  [65] DelayedMatrixStats_1.8.0    gower_0.2.1                
##  [67] ggbeeswarm_0.6.0            iterators_1.0.12           
##  [69] DropletUtils_1.6.1          reticulate_1.26            
##  [71] clusterProfiler_3.14.3      SummarizedExperiment_1.16.1
##  [73] circlize_0.4.15             beeswarm_0.4.0             
##  [75] GetoptLong_1.0.5            xfun_0.35                  
##  [77] bslib_0.3.1                 zoo_1.8-10                 
##  [79] tidyselect_1.1.0            GA_3.2                     
##  [81] reshape2_1.4.4              purrr_0.3.4                
##  [83] ica_1.0-2                   pcaPP_1.9-73               
##  [85] viridisLite_0.3.0           rtracklayer_1.46.0         
##  [87] rlang_1.0.2                 hexbin_1.28.1              
##  [89] jquerylib_0.1.4             dyneval_0.9.9              
##  [91] glue_1.4.2                  waldo_0.3.1                
##  [93] RColorBrewer_1.1-2          matrixStats_0.56.0         
##  [95] stringr_1.4.0               lava_1.6.7                 
##  [97] europepmc_0.3               DESeq2_1.26.0              
##  [99] recipes_0.1.17              labeling_0.3               
## [101] httpuv_1.5.2                class_7.3-17               
## [103] BiocNeighbors_1.4.2         DO.db_2.9                  
## [105] annotate_1.64.0             jsonlite_1.7.2             
## [107] XVector_0.26.0              bit_4.0.4                  
## [109] mime_0.9                    aquarius_0.1.5             
## [111] Rsamtools_2.2.3             gridExtra_2.3              
## [113] gplots_3.0.3                stringi_1.4.6              
## [115] processx_3.5.2              gsl_2.1-6                  
## [117] bitops_1.0-6                cli_3.0.1                  
## [119] batchelor_1.2.4             RSQLite_2.2.0              
## [121] randomForest_4.6-14         tidyr_1.1.4                
## [123] data.table_1.14.2           rstudioapi_0.13            
## [125] units_0.7-2                 GenomicAlignments_1.22.1   
## [127] nlme_3.1-147                qvalue_2.18.0              
## [129] scran_1.14.6                locfit_1.5-9.4             
## [131] scDblFinder_1.1.8           listenv_0.8.0              
## [133] ggthemes_4.2.4              gridGraphics_0.5-0         
## [135] R.oo_1.24.0                 dbplyr_1.4.4               
## [137] TTR_0.24.2                  readxl_1.3.1               
## [139] lifecycle_1.0.1             timeDate_3043.102          
## [141] ggpattern_0.3.1             munsell_0.5.0              
## [143] cellranger_1.1.0            R.methodsS3_1.8.1          
## [145] proxyC_0.1.5                visNetwork_2.0.9           
## [147] caTools_1.18.0              codetools_0.2-16           
## [149] GenomeInfoDb_1.22.1         vipor_0.4.5                
## [151] lmtest_0.9-38               msigdbr_7.5.1              
## [153] htmlTable_1.13.3            triebeard_0.3.0            
## [155] lsei_1.2-0                  xtable_1.8-4               
## [157] ROCR_1.0-7                  classInt_0.4-3             
## [159] BiocManager_1.30.10         scatterplot3d_0.3-41       
## [161] abind_1.4-5                 farver_2.0.3               
## [163] parallelly_1.28.1           RANN_2.6.1                 
## [165] askpass_1.1                 GenomicRanges_1.38.0       
## [167] RcppAnnoy_0.0.16            tibble_3.1.5               
## [169] ggdendro_0.1-20             cluster_2.1.0              
## [171] future.apply_1.5.0          Seurat_3.1.5               
## [173] dendextend_1.15.1           Matrix_1.3-2               
## [175] ellipsis_0.3.2              prettyunits_1.1.1          
## [177] lubridate_1.7.9             ggridges_0.5.2             
## [179] igraph_1.2.5                RcppEigen_0.3.3.7.0        
## [181] fgsea_1.12.0                remotes_2.4.2              
## [183] scBFA_1.0.0                 destiny_3.0.1              
## [185] VIM_6.1.1                   testthat_3.1.0             
## [187] htmltools_0.5.2             BiocFileCache_1.10.2       
## [189] yaml_2.2.1                  utf8_1.1.4                 
## [191] plotly_4.9.2.1              XML_3.99-0.3               
## [193] ModelMetrics_1.2.2.2        e1071_1.7-3                
## [195] foreign_0.8-76              withr_2.5.0                
## [197] fitdistrplus_1.0-14         BiocParallel_1.20.1        
## [199] xgboost_1.4.1.1             bit64_4.0.5                
## [201] foreach_1.5.0               robustbase_0.93-9          
## [203] Biostrings_2.54.0           GOSemSim_2.13.1            
## [205] rsvd_1.0.3                  memoise_2.0.0              
## [207] evaluate_0.18               forcats_0.5.0              
## [209] rio_0.5.16                  geneplotter_1.64.0         
## [211] tzdb_0.1.2                  caret_6.0-86               
## [213] ps_1.6.0                    DiagrammeR_1.0.6.1         
## [215] curl_4.3                    fdrtool_1.2.15             
## [217] fansi_0.4.1                 highr_0.8                  
## [219] urltools_1.7.3              xts_0.12.1                 
## [221] GSEABase_1.48.0             acepack_1.4.1              
## [223] edgeR_3.28.1                checkmate_2.0.0            
## [225] scds_1.2.0                  cachem_1.0.6               
## [227] npsurv_0.4-0                babelgene_22.3             
## [229] rjson_0.2.20                openxlsx_4.1.5             
## [231] ggrepel_0.9.1               clue_0.3-60                
## [233] rprojroot_2.0.2             stabledist_0.7-1           
## [235] tools_3.6.3                 sass_0.4.0                 
## [237] nichenetr_1.1.1             magrittr_2.0.1             
## [239] RCurl_1.98-1.2              proxy_0.4-24               
## [241] car_3.0-11                  ape_5.3                    
## [243] ggplotify_0.0.5             xml2_1.3.2                 
## [245] httr_1.4.2                  assertthat_0.2.1           
## [247] rmarkdown_2.18              boot_1.3-25                
## [249] globals_0.14.0              R6_2.4.1                   
## [251] Rhdf5lib_1.8.0              nnet_7.3-14                
## [253] RcppHNSW_0.2.0              progress_1.2.2             
## [255] genefilter_1.68.0           statmod_1.4.34             
## [257] gtools_3.8.2                shape_1.4.6                
## [259] sf_1.0-3                    HDF5Array_1.14.4           
## [261] BiocSingular_1.2.2          rhdf5_2.30.1               
## [263] splines_3.6.3               AUCell_1.8.0               
## [265] carData_3.0-4               colorspace_1.4-1           
## [267] generics_0.1.0              base64enc_0.1-3            
## [269] dynfeature_1.0.0            smoother_1.1               
## [271] gridtext_0.1.1              pillar_1.6.3               
## [273] tweenr_1.0.1                sp_1.4-1                   
## [275] ggplot.multistats_1.0.0     rvcheck_0.1.8              
## [277] GenomeInfoDbData_1.2.2      plyr_1.8.6                 
## [279] gtable_0.3.0                zip_2.2.0                  
## [281] knitr_1.41                  latticeExtra_0.6-29        
## [283] biomaRt_2.42.1              fastmap_1.1.0              
## [285] ADGofTest_0.3               copula_1.0-0               
## [287] doParallel_1.0.15           vcd_1.4-8                  
## [289] babelwhale_1.0.1            openssl_1.4.1              
## [291] scales_1.1.1                backports_1.2.1            
## [293] ipred_0.9-12                enrichplot_1.6.1           
## [295] hms_1.1.1                   ggforce_0.3.1              
## [297] Rtsne_0.15                  shiny_1.7.1                
## [299] gridpattern_0.3.1           numDeriv_2016.8-1.1        
## [301] polyclip_1.10-0             lazyeval_0.2.2             
## [303] Formula_1.2-3               tsne_0.1-3                 
## [305] crayon_1.3.4                MASS_7.3-54                
## [307] pROC_1.16.2                 viridis_0.5.1              
## [309] dynparam_1.0.0              rpart_4.1-15               
## [311] zinbwave_1.8.0              compiler_3.6.3             
## [313] ggtext_0.1.0
---
title: "HS project"
subtitle: "Figures"
author: "Audrey"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
output:
  html_document:
    code_folding: show
    code_download: true
    toc: true
    toc_float: true
    number_sections: false
---

<style>
body {
text-align: justify}
</style>

<!-- Automatically computes and prints in the output the running time for any code chunk -->
```{r, echo=FALSE}
# https://github.com/rstudio/rmarkdown/issues/1453
hooks = knitr::knit_hooks$get()
hook_foldable = function(type) {
  force(type)
  function(x, options) {
    res = hooks[[type]](x, options)
    
    if (isFALSE(options[[paste0("fold_", type)]])) return(res)
    
    paste0(
      "<details><summary>", "show", "</summary>\n\n",
      res,
      "\n\n</details>"
    )
  }
}
knitr::knit_hooks$set(
  output = hook_foldable("output"),
  plot = hook_foldable("plot"),
  time_it = local({
    now = NULL
    function(before, options) {
      if (options$time_it) {
        if (before) {
          now <= Sys.time()
        } else {
          res = difftime(Sys.time(), now, units = "secs")
          paste("(Time to run :", round(res, digits = 2), "s)")
        }
      }
    }
  })
)
```

<!-- Set default parameters for all chunks -->
```{r, setup, include = FALSE}
set.seed(1337L)
knitr::opts_chunk$set(echo = TRUE, # display code
                      # display chunk output
                      message = FALSE,
                      warning = FALSE,
                      fold_output = FALSE, # useful for sessionInfo()
                      fold_plot = FALSE,
                      
                      # figure settings
                      fig.align = 'center',
                      fig.width = 20,
                      fig.height = 15,
                      
                      # something about seed, chunk and Rmarkdown compilation
                      # https://stackoverflow.com/questions/39417003/long-vectors-not-supported-yet-error-in-rmd-but-not-in-r-script
                      # cache = TRUE,
                      cache.lazy = FALSE, 
                      
                      # add runtime after chunk
                      time_it = FALSE,
                      
                      # save figures in PDF in a separate folder
                      dev = c('png', 'pdf'), # tiff or pdf alone renders bad in html
                      # dpi = 300,
                      fig.path = "figures_detail/",
                      pdf.options(encoding = "ISOLatin9.enc"))
```


This file is used to prepare the figures for the paper.

```{r library}
library(dplyr)
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)
library(org.Mm.eg.db)

.libPaths()
```


# Preparation

Here are the folders where analyzes are stored :

```{r locations}
data_dir = "./.."
list.files(data_dir)
```


We load the dataset containing all cells :

```{r load_all_sobj}
sobj = readRDS(paste0(data_dir, "/3_combined/hs_hd_sobj.rds"))
sobj
```

These are all the samples analyzed :

```{r sample_info, class.source = 'fold-hide', fig.width = 8, fig.height = 3}
sample_info = readRDS(paste0(data_dir, "/1_metadata/hs_hd_sample_info.rds"))

# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
  as.data.frame.table(., stringsAsFactors = FALSE) %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  dplyr::left_join(x = ., y = sample_info, by = "sample_identifier") 

# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
  patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))
```

These are the custom colors for cell populations :

```{r color_markers, fig.width = 10, fig.height = 1, class.source = "fold-hide"}
color_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_color_markers.rds"))
color_markers = color_markers[names(color_markers) != "melanocytes"]
ors_color = color_markers["ORS"]
color_markers["ORS"] = color_markers["IFE"] 
color_markers["IFE"] = ors_color
color_markers["B cells"] = "chocolate3"
rm(ors_color)

# re-order
color_markers = color_markers[c("CD4 T cells", "CD8 T cells", "Langerhans cells", "macrophages", "B cells",
                                "cuticle", "cortex", "medulla", "IRS", "proliferative",
                                "HFSC", "ORS", "IBL", "IFE", "sebocytes")]

data.frame(cell_type = names(color_markers),
           color = unlist(color_markers)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank(),
                 axis.text.x = element_text(hjust = 1, angle = 20))
```

We define custom colors for sample type :

```{r sample_type_colors, fig.width = 3, fig.height = 0.75, class.source = "fold-hide"}
sample_type_colors = setNames(nm = levels(sample_info$sample_type),
                              c("#C55F40", "#2C78E6"))

data.frame(cell_type = names(sample_type_colors),
           color = unlist(sample_type_colors)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(sample_type_colors), breaks = names(sample_type_colors)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank())
```

We set a background color :

```{r bg_color}
bg_color = "gray94"
```


This is the correspondence between cell types and cell families, and custom colors to color cells by cell family :

```{r cell_family}
custom_order_cell_type = data.frame(
  cell_type = names(color_markers),
  cell_family = c(rep("immune cells", 5),
                  rep("matrix", 5),
                  rep("non matrix", 5)),
  stringsAsFactors = FALSE)
custom_order_cell_type$cell_type = factor(custom_order_cell_type$cell_type,
                                          levels = custom_order_cell_type$cell_type)
rownames(custom_order_cell_type) = custom_order_cell_type$cell_type

family_color = c("immune cells" = "slateblue1",
                 "matrix" = "mediumseagreen",
                 "non matrix" = "firebrick3")
```

We load markers to display on a dotplot to assess cell type annotation :

```{r dotplot_markers}
dotplot_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_dotplot_markers.rds"))
dotplot_markers = dotplot_markers[names(dotplot_markers) != "melanocytes"]
lengths(dotplot_markers)
```

Custom functions to display gene expression on the heatmap :

```{r color_fun, class.source = "fold-hide"}
color_fun = function(one_gene) {
  gene_range = range(ht_annot[, one_gene])
  gene_palette = circlize::colorRamp2(colors = c("#FFFFFF", aquarius::color_gene[-1]),
                                      breaks = seq(from = gene_range[1], to = gene_range[2],
                                                   length.out = length(aquarius::color_gene)))
  return(gene_palette)
}
```


# All samples

## Settings

This is the projection name to visualize cells :

```{r all_sobj_name2D}
name2D = "harmony_38_tsne"
name2D_atlas = name2D
```

## Preparation

We make a low resolutive clustering for the heatmap :

```{r cluster_all}
sobj = Seurat::FindClusters(sobj, resolution = 0.4)

length(levels(sobj$seurat_clusters))
```


We define cluster type and cluster family :

```{r cluster_type_family_all, fig.width = 7, fig.height = 5}
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))
sobj$cluster_family = custom_order_cell_type[sobj$cluster_type, "cell_family"]
sobj$cluster_family = factor(sobj$cluster_family,
                             levels = names(family_color))
```


## Figures

Project name :

```{r fig1_sample_identifier, fig.width = 4, fig.height = 4}
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj$project_name
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$project_name) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```


Sample type :

```{r fig1_sample_type, fig.width = 4, fig.height = 4}
# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_type = sobj$sample_type
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_type)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_type_colors,
                              breaks = names(sample_type_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

Cluster :

```{r fig1_cluster, fig.width = 4, fig.height = 4}
grey_palette = setNames(nm = levels(sobj$seurat_clusters),
                        rep("#D9D9D9", length(levels(sobj$seurat_clusters))))
grey_palette[c("7", "16", "1", "12", "11", "10", "15")] = "#BDBDBD"
grey_palette[c("16", "14", "5", "9")] = "#969696"

Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.4,
                group.by = "seurat_clusters", cols = grey_palette,
                label = TRUE, label.size = 6) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

Cell type annotation :

```{r fig1_cell_type, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

Cluster family annotation :

```{r fig1_cluster_family, fig.width = 6, fig.height = 6}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_family", cols = family_color) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

Cell type annotation split by condition :

```{r fig1_cell_type_split, fig.width = 8, fig.height = 5}
plot_list = aquarius::plot_split_dimred(sobj, reduction = name2D,
                                        group_by = "cell_type",
                                        group_color = color_markers,
                                        split_by = "sample_type",
                                        split_color = sample_type_colors,
                                        bg_color = bg_color)

patchwork::wrap_plots(plot_list) &
  Seurat::NoLegend()
```

Gene expression to assess annotation :

```{r fig1_gene_family, fig.width = 4, fig.height = 4}
genes = c("PTPRC", "MSX2", "KRT14")
names(genes) = c("immune cells", "matrix cells", "non-matrix cells")

plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
  gene = genes[[gene_id]]
  pop = names(genes)[gene_id]
  
  sobj$my_gene = Seurat::FetchData(sobj, gene)[, 1] %>%
    aquarius::run_rescale(., new_min = 0, new_max = 10)
  
  Seurat::FeaturePlot(sobj, features = "my_gene", reduction = name2D) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
                                   breaks = seq(0, 10, by = 2.5),
                                   labels = c("min", rep("", 3), "max")) +
    ggplot2::labs(title = gene) + 
    # subtitle = pop) +
    ggplot2::theme(aspect.ratio = 1,
                   plot.title = element_text(hjust = 0.5, size = 17),
                   plot.subtitle = element_text(hjust = 0.5, size = 15),
                   legend.text = element_text(size = 15),
                   legend.position = "none") +
    Seurat::NoAxes()
})

plot_list
```

Barplot by cluster family :

```{r fig1_barplot_family, fig.width = 3.5, fig.height = 3.5}
quantif = table(sobj$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "nb_cells"))

aquarius::plot_barplot(df = table(sobj$sample_identifier,
                                  sobj$cluster_family) %>%
                         as.data.frame.table() %>%
                         `colnames<-`(c("Sample", "Cell Type", "Number")),
                       x = "Sample", y = "Number", fill = "Cell Type",
                       position = position_fill()) +
  ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
                      aes(x = .data$Sample, y = 1.05, label = .data$nb_cells),
                      label.size = 0, size = 4) +
  ggplot2::scale_fill_manual(values = unlist(family_color),
                             breaks = names(family_color),
                             name = "Cell Family") +
  ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
                              labels = paste0(seq(0, 100, by = 25), sep = " %"),
                              expand = ggplot2::expansion(add = c(0, 0.05))) +
  ggplot2::theme(axis.title.y = element_blank(),
                 axis.line.x = element_line(colour = "lightgray"),
                 text = element_text(size = 15),
                 axis.text.x = element_text(margin = margin(t = 25, r = 0, b = 0, l = 0)),
                 legend.position = "none")
```

Heatmap of cluster proportion by sample :

```{r fig1_heatmap_prop, fig.width = 9, fig.height = 5, class.source = "fold-hide"}
group_by = "seurat_clusters"

cluster_by_sample = table(sobj$sample_identifier,
                          sobj@meta.data[, group_by]) %>%
  prop.table(margin = 1) %>%
  as.matrix()

## Right annotation : number of cells by dataset
ht_annot = table(sobj$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  `rownames<-`(.$sample_identifier) %>%
  dplyr::select(-sample_identifier)

ha_right = ComplexHeatmap::HeatmapAnnotation(
  df = ht_annot,
  which = "row",
  show_legend = TRUE,
  annotation_name_side = "top",
  col = list(nb_cells  = circlize::colorRamp2(colors = RColorBrewer::brewer.pal(name = "Greys", n = 9),
                                              breaks = seq(from = range(ht_annot$nb_cells)[1],
                                                           to = range(ht_annot$nb_cells)[2],
                                                           length.out = 9))))

## Left annotation : gender
ha_left = ComplexHeatmap::HeatmapAnnotation(
  gender = sample_info$gender,
  which = "row",
  show_legend = TRUE,
  annotation_name_side = "top",
  col = list(gender = setNames(nm = c("F", "M"),
                               c("lightcyan3", "navyblue"))))

## Top annotation : main cell type in this cluster
ht_annot = table(sobj$sample_identifier,
                 sobj@meta.data[, group_by]) %>%
  prop.table(margin = 1) %>%
  as.matrix()

ht_annot = table(sobj$cell_type,
                 sobj@meta.data[, group_by]) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
ht_annot = data.frame(row.names = names(ht_annot),
                      cell_type = names(color_markers)[ht_annot],
                      stringsAsFactors = FALSE)
ht_annot = dplyr::left_join(ht_annot, custom_order_cell_type, by = "cell_type") %>%
  # Simplification for matrix
  dplyr::mutate(cell_type = ifelse(cell_type %in% c("medulla", "cortex", "cuticle"), yes = "hair shaft", no = cell_type)) %>%
  # Simplification for T cells
  dplyr::mutate(cell_type = ifelse(cell_type %in% c("CD4 T cells", "CD8 T cells"), yes = "T cells", no = cell_type)) %>%
  # Simplification for APC
  dplyr::mutate(cell_type = ifelse(cell_type %in% c("Langerhans cells", "macrophages"), yes = "APC", no = cell_type)) %>%
  # Add color
  dplyr::mutate(color = as.character(color_markers[cell_type])) %>%
  dplyr::mutate(color = ifelse(cell_type == "hair shaft", yes = "#FFB6C1", no = color)) %>%
  dplyr::mutate(color = ifelse(cell_type == "T cells", yes = "#8A6EE6", no = color)) %>%
  dplyr::mutate(color = ifelse(cell_type == "APC", yes = "#9CAA4B", no = color))

ha_top = ComplexHeatmap::HeatmapAnnotation(
  # cell_type = ht_annot$cell_type,
  cell_family = ht_annot$cell_family,
  which = "column",
  show_legend = TRUE,
  show_annotation_name = FALSE,
  # annotation_name_side = "left",
  col = list(#cell_type = setNames(nm = ht_annot$cell_type,
    #                      ht_annot$color),
    cell_family = family_color
  ))

## Assemble heatmap
ht = ComplexHeatmap::Heatmap(cluster_by_sample,
                             heatmap_legend_param = list(title = "Proportion",
                                                         col = c("#2166AC", "#F7F7F7", "#B2182B")),
                             # bottom_annotation = ha_bottom,
                             right_annotation = ha_right,
                             left_annotation = ha_left,
                             top_annotation = ha_top,
                             cluster_rows = TRUE,
                             cluster_columns = TRUE,
                             row_title = "Sample",
                             row_names_gp = grid::gpar(names = sample_info$sample_identifier,
                                                       col = sample_info$color,
                                                       fontface = "bold"),
                             column_title = "Cluster",
                             column_names_centered = TRUE,
                             row_names_side = "left",
                             column_names_side = "top",
                             column_names_rot = 0)

## Draw !
ComplexHeatmap::draw(ht, merge_legends = TRUE)
```

For the dotplot, we clarify clusters and cell type annotation :

```{r improve_custom_order_cell_type}
cell_type_in_cluster = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 1) %>%
  apply(., 1, which.max)
cell_type_in_cluster = cell_type_in_cluster - 1

missing_cluster = setdiff(levels(sobj$seurat_clusters),
                          cell_type_in_cluster)

cell_type_in_cluster = data.frame(cell_type = c(names(cell_type_in_cluster), cluster_type[missing_cluster]),
                                  cluster_id = c(cell_type_in_cluster, names(cluster_type[missing_cluster])),
                                  stringsAsFactors = FALSE, row.names = NULL) %>%
  dplyr::mutate(cluster_id = as.numeric(cluster_id)) %>%
  dplyr::arrange(cell_type, cluster_id)

custom_order_cell_type$clusters = custom_order_cell_type %>%
  apply(., MARGIN = 1, FUN = function(one_row) {
    cell_type = one_row["cell_type"]
    clusters = cell_type_in_cluster %>%
      dplyr::filter(.data$cell_type == .env$cell_type) %>%
      dplyr::pull(cluster_id)
    
    cell_type_cluster = paste0(cell_type, " (", paste0(clusters, collapse = ", "), ")")
    
    return(cell_type_cluster)
  }) %>%
  factor(., levels = .)

custom_order_cell_type
```

Dotplot :

```{r fig1_dotplot, fig.width = 8, fig.height = 8.5, class.source = "fold-hide"}
plot_list = aquarius::plot_dotplot(sobj,
                                   markers = c("PTPRC",
                                               "CD3E", "CD4",
                                               "CD3E", "CD8A",
                                               "CD207", "AIF1",
                                               "TREM2", "MSR1",
                                               "CD79A", "CD79B",
                                               # "PRDM1", "KRT85",
                                               "MSX2",
                                               "KRT32", "KRT35",
                                               "KRT31", "PRR9",
                                               "BAMBI", "ALDH1A3",
                                               "KRT71", "KRT73",
                                               "TOP2A", "MCM5",
                                               "KRT14", "CXCL14",
                                               "KRT15", "COL17A1",
                                               "DIO2", "TCEAL2",
                                               "KRT16", "KRT6C",
                                               "SPINK5", "LY6D",
                                               "CLMP", "PPARG"),
                                   assay = "RNA", column_name = "cell_type", nb_hline = 0) +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(legend.position = "left",
                 legend.justification = "bottom",
                 legend.box = "vertical",
                 legend.box.margin = margin(0,70,0,0),
                 axis.title = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.text.x = element_blank(),
                 axis.line.x = element_blank(),
                 plot.margin = unit(rep(0, 4), "cm"))

p = ggplot2::ggplot(custom_order_cell_type, aes(x = clusters, y = 0)) +
  ggplot2::geom_point(size = 0) +
  ggplot2::geom_segment(aes(x = 0.5, xend = 5.5, y = 0, yend = 0), size = 6, col = family_color["immune cells"]) +
  ggplot2::geom_segment(aes(x = 5.5, xend = 10.5, y = 0, yend = 0), size = 6, col = family_color["matrix"]) +
  ggplot2::geom_segment(aes(x = 10.5, xend = 15.5, y = 0, yend = 0), size = 6, col = family_color["non matrix"]) +
  ggplot2::scale_y_continuous(expand = c(0,0), limits = c(0,0)) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.y = element_blank(),
                 axis.ticks.y = element_blank(),
                 axis.title = element_blank(),
                 axis.line.y = element_blank(),
                 axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
                 plot.margin = unit(c(0,0.5,0.5,0), "cm"))

plot_list = patchwork::wrap_plots(plot_list, p,
                                  ncol = 1, heights = c(25, 1))
plot_list
```


# Immune cells

## Settings

We load the immune cells dataset :

```{r sobj_ic}
sobj_ic = readRDS(paste0(data_dir, "/4_zoom/1_zoom_immune/immune_cells_sobj.rds"))
sobj_ic
```


This is the projection name to visualize cells :

```{r sobj_name2D_ic}
name2D = "harmony_20_tsne"
```

To represent results from differential expression, we load the analyses results :

```{r list_results_ic}
list_results = readRDS(paste0(data_dir, "/4_zoom/1_zoom_immune/immune_cells_list_results.rds"))

lapply(list_results, FUN = names)
```


## Preparation

We defined cluster type and cluster family :

```{r cluster_type_family_ic, fig.width = 7, fig.height = 5}
cluster_type = table(sobj_ic$cell_type, sobj_ic$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj_ic$cell_type)[cluster_type])

sobj_ic$cluster_type = cluster_type[sobj_ic$seurat_clusters]
sobj_ic$cluster_type = factor(sobj_ic$cluster_type,
                              levels = levels(sobj_ic$cell_type))
```


## Figures

Control cells on the full atlas :

```{r fig_ic_location, fig.width = 2, fig.height = 2}
sobj$is_immune = (colnames(sobj) %in% colnames(sobj_ic))

Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_immune", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(family_color[["immune cells"]], bg_color),
                              breaks = c(TRUE, FALSE)) +
  ggplot2::labs(title = "Immune cells",
                subtitle = paste0(ncol(sobj_ic), " cells")) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_text(hjust = 0.5, face = "bold"),
                 plot.subtitle = element_text(hjust = 0.5)) +
  Seurat::NoAxes() + Seurat::NoLegend()
```

Cluster type :

```{r fig_ic_cluster_type, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj_ic, reduction = name2D, pt.size = 1,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

Violin plot of IL1B in macrophages :

```{r fig_ic_mac_il1b, fig.width = 2, fig.height = 2.5}
subsobj = subset(sobj_ic, seurat_clusters == 2)
table(subsobj$sample_type)

il1b_hs = subsobj@assays$RNA@data["IL1B", subsobj$sample_type == "HS"]
il1b_hd = subsobj@assays$RNA@data["IL1B", subsobj$sample_type == "HD"]
il1b_hs_VS_il1b_hd = stats::t.test(il1b_hs, il1b_hd)
il1b_hs_VS_il1b_hd

Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "IL1B", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```

Split by sample :

```{r fig_ic_mac_il1b_split, fig.width = 6, fig.height = 4}
Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
                features = "IL1B", cols = sample_info$color) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```

Violin plot of IL1B in macrophages :

```{r fig_ic_mac_il6, fig.width = 2, fig.height = 2.5}
il6_hs = subsobj@assays$RNA@data["IL6", subsobj$sample_type == "HS"]
il6_hd = subsobj@assays$RNA@data["IL6", subsobj$sample_type == "HD"]
il6_hs_VS_il6_hd = stats::t.test(il6_hs, il6_hd)
il6_hs_VS_il6_hd

Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "IL6", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```

Violin plot of TNF in macrophages :

```{r fig_ic_mac_tnf, fig.width = 2, fig.height = 2.5}
tnf_hs = subsobj@assays$RNA@data["TNF", subsobj$sample_type == "HS"]
tnf_hd = subsobj@assays$RNA@data["TNF", subsobj$sample_type == "HD"]
tnf_hs_VS_tnf_hd = stats::t.test(tnf_hs, tnf_hd)
tnf_hs_VS_tnf_hd

Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "TNF", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```

Split by sample :

```{r fig_ic_mac_tnf_split, fig.width = 6, fig.height = 4}
Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
                features = "TNF", cols = sample_info$color) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```


Violin plot of GZMA in CD4 T cells :

```{r fig_ic_t4_gzma, fig.width = 2, fig.height = 2.5}
subsobj = subset(sobj_ic, seurat_clusters %in% c(0,10))
table(subsobj$sample_type)

gzma_hs = subsobj@assays$RNA@data["GZMA", subsobj$sample_type == "HS"]
gzma_hd = subsobj@assays$RNA@data["GZMA", subsobj$sample_type == "HD"]
gzma_hs_VS_gzma_hd = stats::t.test(gzma_hs, gzma_hd)
gzma_hs_VS_gzma_hd

Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "GZMA", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```

Violin plot of IFNG in CD4 T cells :

```{r fig_ic_t4_ifng, fig.width = 2, fig.height = 2.5}
ifng_hs = subsobj@assays$RNA@data["IFNG", subsobj$sample_type == "HS"]
ifng_hd = subsobj@assays$RNA@data["IFNG", subsobj$sample_type == "HD"]
ifng_hs_VS_ifng_hd = stats::t.test(ifng_hs, ifng_hd)
ifng_hs_VS_ifng_hd

Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "IFNG", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```
Violin plot of IL17A in CD4 T cells :

```{r fig_ic_t4_IL17, fig.width = 2, fig.height = 2.5}
IL17_hs = subsobj@assays$RNA@data["IL17A", subsobj$sample_type == "HS"]
IL17_hd = subsobj@assays$RNA@data["IL17A", subsobj$sample_type == "HD"]
IL17_hs_VS_IL17_hd = stats::t.test(IL17_hs, IL17_hd)
IL17_hs_VS_IL17_hd

Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
                features = "IL17RE", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```

We represent some genes split by sample type :

```{r fig_ic_gene_split, fig.width = 6, fig.height = 3}
plot_list = lapply(c("IL1B", "GZMA", "IFNG", "IL17A", "TNF", "IL6"), FUN = function(one_gene) {
  p = aquarius::plot_split_dimred(sobj_ic,
                                  reduction = name2D,
                                  split_by = "sample_type",
                                  color_by = one_gene,
                                  color_palette = c("gray70", "#FDBB84", "#EF6548", "#7F0000", "black"),
                                  main_pt_size = 0.6,
                                  bg_pt_size = 0.6,
                                  order = TRUE,
                                  bg_color = "gray95")
  p = patchwork::wrap_plots(p, nrow = 1) +
    patchwork::plot_layout(guides = "collect") +
    ggplot2::theme(legend.position = "right") &
    ggplot2::theme(plot.subtitle = element_blank())
  return(p)
})

plot_list
```

Barplot by cluster type :

```{r fig_ic_barplot, fig.width = 4, fig.height = 4}
quantif = table(sobj_ic$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "nb_cells"))

aquarius::plot_barplot(df = table(sobj_ic$sample_identifier,
                                  sobj_ic$cluster_type) %>%
                         as.data.frame.table() %>%
                         `colnames<-`(c("Sample", "Cell Type", "Number")),
                       x = "Sample", y = "Number", fill = "Cell Type",
                       position = position_stack()) +
  ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
                      aes(x = .data$Sample, y = 50 + .data$nb_cells, label = .data$nb_cells),
                      label.size = 0, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(color_markers),
                             breaks = names(color_markers),
                             name = "Cell Type") +
  ggplot2::scale_y_continuous(limits = c(0, 100 + max(quantif$nb_cells)),
                              expand = ggplot2::expansion(add = c(0, 0.05))) +
  ggplot2::theme(axis.title.y = element_blank(),
                 axis.line.x = element_line(colour = "lightgray"),
                 text = element_text(size = 15),
                 legend.position = "none")
```

Heatmap for macrophages :

```{r fig_ic_heatmap_macrophages, fig.width = 5, fig.height = 5.2, class.source = "fold-hide"}
subsobj = subset(sobj_ic, cluster_type == "macrophages")
features_oi = c("IL1B", "TNF",
                "HLA-DQA2", "HLA-DPA1", "HLA-DRB5",
                "HLA-A", "HLA-C", "B2M",
                "C1QA", "C1QB", "C1QC")

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# Cells order
column_order = subsobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]))

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "bottom",
                     annotation_legend_side = "bottom")
```

Heatmap for CD4 T cells :

```{r fig_ic_heatmap_cd4, fig.width = 6, fig.height = 4, class.source = "fold-hide"}
subsobj = subset(sobj_ic, cluster_type == "CD4 T cells")
features_oi = c("GZMA", "KLRB1", "BTG1", "ZFP36", "NFKBIA", "TXNIP", "CXCR4", "IFNG", "IL17A")

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# Cells order
column_order = subsobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]))

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "left",
                     annotation_legend_side = "left")
```


# HFSC

## Settings

We load the HFSCs dataset :

```{r sobj_hfsc}
sobj_hfsc = readRDS(paste0(data_dir, "/4_zoom/2_zoom_hfsc/hfsc_sobj.rds"))
sobj_hfsc
```


This is the projection name to visualize cells :

```{r sobj_name2D_hfsc}
name2D = "harmony_24_tsne"
```

To represent results from differential expression, we load the analyses results :

```{r list_results_hfsc}
list_results = readRDS(paste0(data_dir, "/4_zoom/2_zoom_hfsc/hfsc_list_results.rds"))

lapply(list_results, FUN = names)
```

## Figures

HFSCs on the full atlas :

```{r fig_hfsc_location, fig.width = 2, fig.height = 2}
sobj$is_hfsc = (colnames(sobj) %in% colnames(sobj_hfsc))

Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_hfsc", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[["HFSC"]], bg_color),
                              breaks = c(TRUE, FALSE)) +
  ggplot2::labs(title = "HFSCs",
                subtitle = paste0(ncol(sobj_hfsc), " cells")) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_text(hjust = 0.5, face = "bold"),
                 plot.subtitle = element_text(hjust = 0.5)) +
  Seurat::NoAxes() + Seurat::NoLegend()
```

KRT15 expression :

```{r fig_hfsc_krt15, fig.width = 2, fig.height = 2}
Seurat::FeaturePlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                    features = "KRT15") +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() + Seurat::NoLegend()
```

Genes of interest :

```{r fig_hfsc_genes, fig.width = 4, fig.height = 4}
genes = c("TGFB2", "ANGPTL7", "FGF18", "MGP", "EPCAM", "KRT75", "NOTCH3", "PTHLH")

plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
  gene = genes[[gene_id]]
  
  sobj_hfsc$my_gene = Seurat::FetchData(sobj_hfsc, gene)[, 1] %>%
    aquarius::run_rescale(., new_min = 0, new_max = 10)
  
  Seurat::FeaturePlot(sobj_hfsc, features = "my_gene", reduction = name2D) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
                                   breaks = seq(0, 10, by = 2.5),
                                   labels = c("min", rep("", 3), "max")) +
    ggplot2::labs(title = gene) +
    ggplot2::theme(aspect.ratio = 1,
                   plot.title = element_text(hjust = 0.5, size = 17),
                   plot.subtitle = element_text(hjust = 0.5, size = 15),
                   legend.text = element_text(size = 15),
                   legend.position = "none") +
    Seurat::NoAxes()
})

plot_list
```

Project name :

```{r fig_hfsc_project_name, fig.width = 4, fig.height = 4}
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj_hfsc), replace = FALSE, size = ncol(sobj_hfsc))

# Extract coordinates
cells_coord = sobj_hfsc@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj_hfsc$project_name
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
  ggplot2::geom_point(size = 1.2) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$project_name) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

Cluster :

```{r fig_hfsc_clusters, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj_hfsc, reduction = name2D, pt.size = 1,
                group.by = "seurat_clusters", cols = grey_palette,
                label = TRUE, label.size = 7) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

Heatmap with proportions :

```{r fig_hfsc_heatmap, fig.width = 5.5, fig.height = 5.5, class.source = "fold-hide"}
cluster_markers = c("TGFB2", "ANGPTL7", "EPCAM", "KRT75", "NOTCH3", "PTHLH")

## Bottom annotation : gene expression by cluster
ht_annot = Seurat::FetchData(sobj_hfsc, slot = "data", vars = cluster_markers) %>%
  as.data.frame()
ht_annot$clusters = sobj_hfsc$seurat_clusters
ht_annot = ht_annot %>%
  dplyr::group_by(clusters) %>%
  dplyr::summarise_all(funs('mean' = mean)) %>%
  as.data.frame() %>%
  dplyr::select(-clusters) %>%
  `colnames<-`(c(cluster_markers))

ha_bottom = ComplexHeatmap::HeatmapAnnotation(df = ht_annot,
                                              which = "column",
                                              show_legend = TRUE,
                                              col = setNames(nm = cluster_markers,
                                                             lapply(cluster_markers, FUN = color_fun)),
                                              annotation_name_side = "left")

## Right annotation : number of cells by dataset
ht_annot = table(sobj_hfsc$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  `rownames<-`(.$sample_identifier) %>%
  dplyr::select(-sample_identifier)

ha_right = ComplexHeatmap::HeatmapAnnotation(
  df = ht_annot,
  which = "row",
  show_legend = TRUE,
  annotation_name_side = "bottom",
  col = list(nb_cells  = circlize::colorRamp2(colors = RColorBrewer::brewer.pal(name = "Greys", n = 9),
                                              breaks = seq(from = range(ht_annot$nb_cells)[1],
                                                           to = range(ht_annot$nb_cells)[2],
                                                           length.out = 9))))

## Heatmap
ht = aquarius::plot_prop_heatmap(df = sobj_hfsc@meta.data[, c("sample_identifier", "seurat_clusters")],
                                 bottom_annotation = ha_bottom,
                                 # right_annotation = ha_right,
                                 cluster_rows = TRUE,
                                 column_names_centered = TRUE,
                                 prop_margin = 1,
                                 row_names_gp = grid::gpar(names = sample_info$sample_identifier,
                                                           col = sample_info$color,
                                                           fontface = "bold"),
                                 row_title = "Sample",
                                 column_title = "Cluster")

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "bottom")
```

Heatmap for cluster 0 and 8 :

```{r fig_hfsc_heatmap_cluster08, fig.width = 5.5, fig.height = 10, class.source = "fold-hide"}
subsobj = subset(sobj_hfsc, seurat_clusters %in% c(0,8))

features_oi = rownames(list_results$cluster_0_8)
features_oi = features_oi[!grepl(features_oi, pattern = "^RP")]

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = cbind(mat_expr, subsobj$percent.mt)
colnames(mat_expr)[ncol(mat_expr)] = "percent.rb"
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# list_colors[["seurat_clusters"]] = setNames(aquarius::gg_color_hue(length(levels(subsobj$seurat_clusters))),
#                                             nm = levels(subsobj$seurat_clusters))
# Cells order
column_order = subsobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           # clusters = subsobj$seurat_clusters,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]
                                      # clusters = list_colors[["seurat_clusters"]]
                           ))


# g1 : REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM
# g2 : GOBP_APOPTOTIC_PROCESS
g1_genes = c("B2M", "HLA-C", "HLA-A", "MIF", "PPIA", "JUNB", "IFITM3")
g2_genes = c("Jun", "ATF3", "BTG2", "RHOB", "NFKBIA", "SGK1", "KLF9",
             "CAV1", "DDIT4", "PDK4", "TXNIP", "RNF1152", "TLE1")
ha_right = data.frame(genes =  c(features_oi, "percent.rb"), rownames = c(features_oi, "percent.rb"))
ha_right$group = case_when(ha_right$genes %in% g1_genes ~ "REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM",
                           ha_right$genes %in% g2_genes ~ "GOBP_APOPTOTIC_PROCESS",
                           TRUE ~ "others")

list_colors[["group"]] = setNames(
  nm = c("REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM", "GOBP_APOPTOTIC_PROCESS", "others"),
  c("red", "black", "gray90"))

ha_right = HeatmapAnnotation(group = ha_right$group,
                             col = list(group = list_colors[["group"]]),
                             which = "row",
                             show_annotation_name = FALSE,
                             show_legend = TRUE)

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             right_annotation = ha_right,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 10, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "bottom",
                     annotation_legend_side = "bottom")
```

Violin plot of IFITM3 :

```{r fig_hfsc_ifitm3, fig.width = 3, fig.height = 4}
table(subsobj$sample_type)

ifitm3_hs = subsobj@assays$RNA@data["IFITM3", subsobj$sample_type == "HS"]
ifitm3_hd = subsobj@assays$RNA@data["IFITM3", subsobj$sample_type == "HD"]
ifitm3_hs_VS_ifitm3_hd = stats::t.test(ifitm3_hs, ifitm3_hd)
ifitm3_hs_VS_ifitm3_hd

Seurat::VlnPlot(subsobj, group.by = "sample_type",
                features = "IFITM3", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```

Split by sample :

```{r fig_hfsc_ifitm3_split, fig.width = 6, fig.height = 4}
Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
                features = "IFITM3", cols = sample_info$color) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```


Violin plot of DDIT4 :

```{r fig_hfsc_ddit4, fig.width = 3, fig.height = 4}
table(subsobj$sample_type)

DDIT4_hs = subsobj@assays$RNA@data["DDIT4", subsobj$sample_type == "HS"]
DDIT4_hd = subsobj@assays$RNA@data["DDIT4", subsobj$sample_type == "HD"]
DDIT4_hs_VS_DDIT4_hd = stats::t.test(DDIT4_hs, DDIT4_hd)
DDIT4_hs_VS_DDIT4_hd

Seurat::VlnPlot(subsobj, group.by = "sample_type",
                features = "DDIT4", cols = sample_type_colors) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```

Split by sample :

```{r fig_hfsc_ddit4_split, fig.width = 6, fig.height = 4}
Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
                features = "DDIT4", cols = sample_info$color) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```

We represent some genes split by sample type :

```{r fig_hfsc_gene_split, fig.width = 12, fig.height = 4}
plot_list = lapply(c("DDIT4", "IFITM3"), FUN = function(one_gene) {
  p = aquarius::plot_split_dimred(sobj_hfsc,
                                  reduction = name2D,
                                  split_by = "sample_type",
                                  color_by = one_gene,
                                  color_palette = c("gray70", "#FDBB84", "#EF6548", "#7F0000", "black"),
                                  main_pt_size = 0.6,
                                  bg_pt_size = 0.6,
                                  order = TRUE,
                                  bg_color = "gray95")
  p = patchwork::wrap_plots(p, nrow = 1) +
    patchwork::plot_layout(guides = "collect") +
    ggplot2::theme(legend.position = "right") &
    ggplot2::theme(plot.subtitle = element_blank())
  return(p)
})

patchwork::wrap_plots(plot_list, ncol = 2)
```


Barplot with number of HFSCs and total number of cells :

```{r fig_hfsc_barplot, fig.width = 5.5, fig.height = 4.5}
quantif = dplyr::left_join(
  x = table(sobj$sample_identifier) %>%
    as.data.frame.table() %>%
    `colnames<-`(c("Sample", "nb_cells")),
  y = table(sobj_hfsc$sample_identifier) %>%
    as.data.frame.table() %>%
    `colnames<-`(c("Sample", "nb_hfsc")),
  by = "Sample") %>%
  dplyr::mutate(prop_hfsc = round(100*nb_hfsc / nb_cells, 2))

quantif_to_plot = rbind.data.frame(
  data.frame(Sample = quantif$Sample,
             nb_cells = quantif$nb_cells - quantif$nb_hfsc,
             cell_type = "others",
             stringsAsFactors = FALSE),
  data.frame(Sample = quantif$Sample,
             nb_cells = quantif$nb_hfsc,
             cell_type = "hfsc",
             stringsAsFactors = FALSE)) %>%
  dplyr::mutate(cell_type = factor(cell_type, levels = c("others", "hfsc")))

aquarius::plot_barplot(df = quantif_to_plot,
                       x = "Sample", y = "nb_cells", fill = "cell_type",
                       position = position_fill()) +
  ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
                      aes(x = .data$Sample, y = 0.05+.data$prop_hfsc/100, label = .data$nb_hfsc),
                      label.size = 0, size = 5, fill = NA) +
  ggplot2::scale_fill_manual(values = c("gray90", color_markers[["HFSC"]]),
                             breaks = c("others", "hfsc"),
                             name = "Cell Type") +
  ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
                              labels = paste0(seq(0, 100, by = 25), sep = " %"),
                              expand = ggplot2::expansion(add = c(0, 0.05))) +
  ggplot2::theme(axis.title.y = element_blank(),
                 axis.line.x = element_line(colour = "lightgray"),
                 text = element_text(size = 15),
                 legend.position = "none")
```

# IBL and ORS

## Settings

We load the IBL + ORS dataset :

```{r sobj_iblors}
sobj_iblors = readRDS(paste0(data_dir, "/4_zoom/3_zoom_iblmors/iblmors_sobj.rds"))
sobj_iblors
```


This is the projection name to visualize cells :

```{r sobj_name2D_iblors}
name2D = "harmony_20_tsne"
```

To represent results from differential expression, we load the analyses results :

```{r list_results_iblors}
list_results = readRDS(paste0(data_dir, "/4_zoom/3_zoom_iblmors/iblmors_list_results.rds"))

lapply(list_results, FUN = names)
```

## Preparation

We defined cluster type :

```{r cluster_type_iblors, fig.width = 7, fig.height = 5}
cluster_type = table(sobj_iblors$cell_type, sobj_iblors$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj_iblors$cell_type)[cluster_type])

sobj_iblors$cluster_type = cluster_type[sobj_iblors$seurat_clusters]
sobj_iblors$cluster_type = factor(sobj_iblors$cluster_type,
                                  levels = c("IBL", "ORS"))
```

## Figures

IBL + ORS on the full atlas :

```{r fig_iblors_location, fig.width = 2, fig.height = 2}
sobj$cell_bc = colnames(sobj)
sobj_iblors$cell_bc = colnames(sobj_iblors)
sobj$is_iblors = dplyr::left_join(sobj@meta.data[, c("cell_bc", "percent.mt")],
                                  sobj_iblors@meta.data[, c("cell_bc", "cluster_type")],
                                  by = "cell_bc")[, "cluster_type"]

Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_iblors", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[c("IBL", "ORS")], bg_color),
                              breaks = c("IBL", "ORS", NA), na.value = bg_color) +
  ggplot2::labs(title = "IBL + ORS",
                subtitle = paste0(ncol(sobj_iblors), " cells")) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_text(hjust = 0.5, face = "bold"),
                 plot.subtitle = element_text(hjust = 0.5)) +
  Seurat::NoAxes() + Seurat::NoLegend()
```


Project name :

```{r fig_iblors_project_name, fig.width = 4, fig.height = 4}
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj_iblors), replace = FALSE, size = ncol(sobj_iblors))

# Extract coordinates
cells_coord = sobj_iblors@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj_iblors$project_name
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
  ggplot2::geom_point(size = 1.2) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$project_name) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

Cluster :

```{r fig_iblors_clusters, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj_iblors, reduction = name2D, pt.size = 1,
                group.by = "seurat_clusters", cols = grey_palette,
                label = TRUE, label.size = 8) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

Cluster type :

```{r fig_iblors_cluster_type, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj_iblors, reduction = name2D, pt.size = 1,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

Cluster type split by sample type :

```{r fig_iblors_cluster_type_split, fig.width = 8, fig.height = 4}
plot_list = aquarius::plot_split_dimred(sobj_iblors, reduction = name2D,
                                        group_by = "cluster_type",
                                        group_color = color_markers,
                                        split_by = "sample_type",
                                        bg_pt_size = 1, main_pt_size = 1,
                                        bg_color = bg_color)

patchwork::wrap_plots(plot_list) &
  Seurat::NoLegend()
```

Number of cells by cluster and by sample :

```{r ibl_ors_table_clusters}
table(sobj_iblors$seurat_clusters,
      sobj_iblors$sample_identifier)

table(sobj_iblors$cluster_type,
      sobj_iblors$sample_identifier)
```

Separate cluster 5 :

```{r ibl_ors_table_clusters_sep5}
sobj_iblors$cluster_type_sep5 = ifelse(sobj_iblors$seurat_clusters == 5,
                                       yes = "ORS_5",
                                       no = as.character(sobj_iblors$cluster_type)) %>%
  as.factor()

table(sobj_iblors$cluster_type_sep5,
      sobj_iblors$sample_type)
```


Barplot by cluster family :

```{r fig_iblors_barplot, fig.width = 5.5, fig.height = 4.5}
quantif = table(sobj_iblors$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "nb_cells"))

quantif_to_plot = table(sobj_iblors$sample_identifier,
                        sobj_iblors$cluster_type_sep5) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "CellType", "Number")) %>%
  dplyr::mutate(Style = ifelse(CellType == "ORS_5", yes = "IL1R2+", no = "IL1R2-")) %>%
  dplyr::mutate(Style = factor(Style, levels = c("IL1R2-", "IL1R2+"))) %>%
  dplyr::mutate(CellType = ifelse(CellType == "ORS_5", yes = "ORS", no = as.character(CellType))) %>%
  `colnames<-`(c("Sample", "Cell Type", "Number", "IL1R2 status"))

aquarius::plot_barplot(df = quantif_to_plot,
                       x = "Sample", y = "Number",
                       fill = "Cell Type", pattern = "IL1R2 status",
                       position = position_fill()) +
  ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
                      aes(x = .data$Sample, y = 1.05, label = .data$nb_cells),
                      label.size = 0, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(color_markers[levels(sobj_iblors$cluster_type)]),
                             breaks = names(color_markers[levels(sobj_iblors$cluster_type)]),
                             name = "Cell Type") +
  ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
                              labels = paste0(seq(0, 100, by = 25), sep = " %"),
                              expand = ggplot2::expansion(add = c(0, 0.05))) +
  ggplot2::theme(axis.title.y = element_blank(),
                 axis.line.x = element_line(colour = "lightgray"),
                 text = element_text(size = 15),
                 legend.position = "right")
```

Focus on cluster 5, among ORS :

```{r fig_iblors_barplot_cluster5, fig.width = 5.5, fig.height = 4.5}
subsobj = subset(sobj_iblors, cluster_type == "ORS")
subsobj$cluster_type_sep5 = ifelse(subsobj$seurat_clusters == 5,
                                   yes = "ORS_5",
                                   no = as.character(subsobj$cluster_type)) %>%
  as.factor()

quantif = table(subsobj$sample_identifier) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "nb_cells"))

quantif_to_plot = table(subsobj$sample_identifier,
                        subsobj$cluster_type_sep5) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("Sample", "CellType", "Number")) %>%
  dplyr::mutate(Style = ifelse(CellType == "ORS_5", yes = "IL1R2+", no = "IL1R2-")) %>%
  dplyr::mutate(Style = factor(Style, levels = c("IL1R2-", "IL1R2+"))) %>%
  dplyr::mutate(CellType = ifelse(CellType == "ORS_5", yes = "ORS", no = as.character(CellType))) %>%
  `colnames<-`(c("Sample", "Cell Type", "Number", "IL1R2 status")) %>%
  dplyr::left_join(., y = quantif, by = "Sample") %>%
  dplyr::mutate(prop_cluster5 = round(Number/nb_cells, 4))

aquarius::plot_barplot(df = quantif_to_plot,
                       x = "Sample", y = "Number",
                       fill = "IL1R2 status",
                       position = position_fill()) +
  # ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
  #                     aes(x = .data$Sample, y = 1.05, label = .data$nb_cells),
  #                     label.size = 0, size = 5) +
  ggplot2::geom_label(data = quantif_to_plot %>%
                        dplyr::filter(`IL1R2 status` == "IL1R2+"),
                      inherit.aes = FALSE,
                      aes(x = .data$Sample, y = prop_cluster5 + 0.05, label = .data$Number),
                      label.size = 0, size = 5, fill = NA, col = "white") +
  ggplot2::scale_fill_manual(values = c("black", color_markers[["ORS"]]),
                             breaks = c("IL1R2+", "IL1R2-"),
                             name = "IL1R2 status") +
  ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
                              labels = paste0(seq(0, 100, by = 25), sep = " %"),
                              expand = ggplot2::expansion(add = c(0, 0.05))) +
  ggplot2::theme(axis.title.y = element_blank(),
                 axis.line.x = element_line(colour = "lightgray"),
                 text = element_text(size = 15),
                 legend.position = "right")
```


DE genes between IBL and ORS :

```{r fig_iblors_de_pop, fig.width = 6, fig.height = 6}
mark = list_results$IBL_vs_ORS$mark
mark$gene_name = rownames(mark)
mark_label = rbind(
  # up-regulated in IBL
  mark %>% dplyr::top_n(., n = 20, wt = avg_logFC),
  # up-regulated in ORS
  mark %>% dplyr::top_n(., n = 20, wt = -avg_logFC),
  # representative and selective for IBL
  mark %>% dplyr::top_n(., n = 20, wt = (pct.1 - pct.2)),
  # representative and selective for ORS
  mark %>% dplyr::top_n(., n = 20, wt = -(pct.1 - pct.2))) %>%
  dplyr::distinct()
mark_label = mark_label[!grepl(rownames(mark_label), pattern = "^MT"), ]

avg_logFC_range = setNames(c(min(mark_label$avg_logFC), -1, 0, 1, max(mark_label$avg_logFC)),
                           nm = c("dodgerblue4", "dodgerblue3", "#B7B7B7", "firebrick3", "firebrick4"))


ggplot2::ggplot(mark, aes(x = pct.1, y = pct.2, col = avg_logFC)) +
  ggplot2::geom_abline(slope = 1, intercept = 0, lty = 2) +
  ggplot2::geom_point() +
  ggrepel::geom_label_repel(data = mark_label, max.overlaps = Inf,
                            aes(x = pct.1, y = pct.2, label = gene_name),
                            col = "black", fill = NA, size = 3.5, label.size = NA) +
  ggplot2::labs(x = "Enriched in IBL",
                y = "Enriched in ORS") +
  ggplot2::scale_color_gradientn(colors = names(avg_logFC_range),
                                 values = scales::rescale(unname(avg_logFC_range))) +
  ggplot2::theme_classic() +
  ggplot2::theme(aspect.ratio = 1)
```

GSEA plot :

```{r fig_iblors_keratinization, fig.width = 6, fig.height = 4}
the_gs_name = "REACTOME_KERATINIZATION" 
the_content = list_results$IBL_vs_ORS$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      " | pvalue : ", round(the_content$pvalue, 4),
                      " | set size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))
```

```{r fig_iblors_ifna, fig.width = 6, fig.height = 4}
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_content = list_results$IBL_vs_ORS$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      " | pvalue : ", round(the_content$pvalue, 4),
                      " | set size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))
```

Score for both gene sets, in all cells :

```{r fig_iblors_kera_score, fig.width = 6, fig.height = 4}
gene_sets = aquarius::get_gene_sets(species = "Homo sapiens")
the_gs_name = "REACTOME_KERATINIZATION" 
the_gs_content = gene_sets$gene_sets_full %>%
  dplyr::filter(gs_name == the_gs_name) %>%
  dplyr::pull(gene_symbol) %>%
  unlist()

sobj_iblors$score_kera = Seurat::AddModuleScore(sobj_iblors,
                                                features = list(the_gs_content))$Cluster1

Seurat::VlnPlot(sobj_iblors, features = "score_kera", pt.size = 0.05,
                split.by = "sample_type", group.by = "cluster_type",
                cols = rev(sample_type_colors)) +
  ggplot2::labs(title = the_gs_name) +
  ggplot2::theme(axis.title.x = element_blank())
```

```{r fig_iblors_ifna_score, fig.width = 6, fig.height = 4}
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_gs_content = gene_sets$gene_sets_full %>%
  dplyr::filter(gs_name == the_gs_name) %>%
  dplyr::pull(gene_symbol) %>%
  unlist()

sobj_iblors$score_ifna = Seurat::AddModuleScore(sobj_iblors,
                                                features = list(the_gs_content))$Cluster1

Seurat::VlnPlot(sobj_iblors, features = "score_ifna", pt.size = 0.05,
                split.by = "sample_type", group.by = "cluster_type",
                cols = rev(sample_type_colors)) +
  ggplot2::labs(title = the_gs_name) +
  ggplot2::theme(axis.title.x = element_blank())
```


Violin plot for IBL :

```{r fig_iblors_ibl, fig.width = 12, fig.height = 2.5}
subsobj = subset(sobj_iblors, cluster_type == "IBL")

Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.05,
                features = c("DUSP1", "DDIT4", "MIF", "LGALS7", "ARF5", "S100A9"),
                cols = sample_type_colors, ncol = 6) &
  ggplot2::theme(axis.title.x = element_blank(),
                 axis.title.y = element_blank(),
                 legend.position = "none")
```

Violin plot for ORS :

```{r fig_iblors_ors, fig.width = 12, fig.height = 2.5}
subsobj = subset(sobj_iblors, cluster_type == "ORS")

Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.05,
                features = c("DUSP1", "KLF6", "CLDN1", "CTGF",
                             "S100A9", "CCL2", "IFITM3", "IFI27"),
                cols = sample_type_colors, ncol = 8) &
  ggplot2::theme(axis.title.x = element_blank(),
                 axis.title.y = element_blank(),
                 legend.position = "none")
```

Split by sample :

```{r fig_iblors_ors_split, fig.width = 6, fig.height = 4}
Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
                features = "IFI27", cols = sample_info$color) +
  ggplot2::theme(axis.title.x = element_blank(),
                 legend.position = "none")
```

Heatmap for cluster 5 vs other ORS :

```{r fig_iblors_heatmap_cluster5, fig.width = 12, fig.height = 19, class.source = "fold-hide"}
subsobj = subset(sobj_iblors, cluster_type == "ORS")

features_oi = c("YBX3", "TXNIP", "KRT14", "KRT15", "NEAT1",
                "FXYD3", "MT2A", "MT1E", "MT1X", "AQP3", "GLUL",
                # "HALLMARK_TNFA_SIGNALING_VIA_NFKB"
                "FOS", "JUNB", "DUSP1", "ZFP36", "NFKBIZ",
                "ATF3", "RHOB",  "ETS2", "IL18", "KLF4", "KLF6", "KLF9",
                "KLF3", "KLF5", "COL17A1", "THSD4", "WNT3", "WNT4", "SLPI", "PLAT",
                "LAMB4", "DCN", "SPINK5",
                "GSTM3", "ALDH3A1",  "LGALS7B", "SLC38A2", "EHF",  "CLEC2B",
                "IL20RB", "IL1R2", "IFI27", "CXCL14", "HLA-C", "GPSM2", "DAAM1",   "ID1",
                "RNASET2", "HOPX", "POU3F1", "SPRY1", "AR", "PDGFC",
                "WFDC2", "WFDC5", "TSC22D3", "FGFR3",  "LY6D", "IGFBP3", 
                # Other ORS
                "APOE", "CTSB", "CALD1", "SOX4",
                "STMN1", "LMO4", "CEBPB", "TMEM45A", "GPX2", "C1QTNF12", "GJB6",
                "KRT6A", "KRT17", "RBP1", "CALML3", "PTN", "DAPK2",
                "EGLN3", "FILIP1L", "ADGRL3", "FST", "EFNB2", "SEMA5A",
                "FGFR1", "EGR2", "CLDN1", "DEFB1", "CARD18", "MGST1")

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells

## Colors
list_colors = list()
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
list_colors[["population"]] = setNames(nm = c("IL1R2+ ORS", "other ORS"),
                                       c("black", color_markers["ORS"]))
list_colors[["nFeature_RNA"]] = circlize::colorRamp2(breaks = seq(from = min(subsobj$nFeature_RNA),
                                                                  to = max(subsobj$nFeature_RNA),
                                                                  length.out = 9),
                                                     colors = RColorBrewer::brewer.pal(name = "Greys", n = 9))

# Cells order
column_order = subsobj@meta.data %>%
  dplyr::mutate(seurat_clusters = factor(seurat_clusters, levels = c(5, 3, 0, 1, 7))) %>%
  dplyr::arrange(sample_type, seurat_clusters, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Annotation
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           population = ifelse(subsobj$cluster_type_sep5 == "ORS",
                                               yes = "other ORS", no = "IL1R2+ ORS"),
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]],
                                      population = list_colors[["population"]]))

ha_bottom = HeatmapAnnotation(nFeature_RNA = subsobj$nFeature_RNA,
                              col = list(nFeature_RNA = list_colors[["nFeature_RNA"]]))

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             bottom_annotation = ha_bottom,
             # Cell grouping
             column_split = subsobj$sample_type %>% as.character(),
             cluster_columns = FALSE,
             column_order = column_order,
             column_title = NULL,
             show_column_dend = FALSE,
             show_column_names = FALSE,
             # Genes
             cluster_rows = FALSE,
             row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
             # Style
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "right",
                     annotation_legend_side = "right")
```

Genes of interest :

```{r fig_iblors_genes, fig.width = 2, fig.height = 2}
genes = c("KRT16", "COL17A1", "DST", "KRT6B", "IL1R2", "WNT3",
          "IFI27", "CXCL14", "IGFBP3", "KRT15", "CD200", "IL18")

plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
  gene = genes[[gene_id]]
  
  sobj_iblors$my_gene = Seurat::FetchData(sobj_iblors, gene)[, 1] %>%
    aquarius::run_rescale(., new_min = 0, new_max = 10)
  
  Seurat::FeaturePlot(sobj_iblors, features = "my_gene", reduction = name2D, pt.size = 0.25) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
                                   breaks = seq(0, 10, by = 2.5),
                                   labels = c("min", rep("", 3), "max")) +
    ggplot2::labs(title = gene) +
    ggplot2::theme(aspect.ratio = 1,
                   plot.title = element_text(hjust = 0.5, size = 17),
                   plot.subtitle = element_text(hjust = 0.5, size = 15),
                   legend.text = element_text(size = 15),
                   legend.position = "none") +
    Seurat::NoAxes()
})

plot_list
```


# HFSCs to IBL and ORS

## Settings

We load the merged dataset :

```{r sobj_traj}
sobj_traj = readRDS(paste0(data_dir, "/4_zoom/4_zoom_hfsc_iblmors/hfsc_iblmors_sobj_traj_tinga.rds"))
sobj_traj
```


This is the projection name to visualize cells :

```{r sobj_name2D_traj}
name2D = "harmony_dm"
```

We load the trajectory object for visualisation purpose :

```{r load_traj}
my_traj = readRDS(paste0(data_dir, "/4_zoom/4_zoom_hfsc_iblmors/hfsc_iblmors_my_traj_tinga.rds"))
class(my_traj)
```


## Preparation

We defined cell type based on individual object :

```{r cluster_type_traj, fig.width = 7, fig.height = 5}
sobj_iblors$cell_bc = colnames(sobj_iblors)
sobj_traj$cell_bc = colnames(sobj_traj)
sobj_traj$cluster_type = dplyr::left_join(sobj_traj@meta.data[, c("cell_bc", "percent.mt")],
                                          sobj_iblors@meta.data[, c("cell_bc", "cluster_type")],
                                          by = "cell_bc")[, "cluster_type"] %>% as.character()
sobj_traj$cluster_type = ifelse(colnames(sobj_traj) %in% colnames(sobj_hfsc),
                                yes = "HFSC",
                                no = sobj_traj$cluster_type) %>%
  as.factor()
```

## Figures

Cells on the full atlas :

```{r fig_traj_location, fig.width = 2, fig.height = 2}
sobj$cell_bc = colnames(sobj)
sobj_traj$cell_bc = colnames(sobj_traj)
sobj$is_traj = dplyr::left_join(sobj@meta.data[, c("cell_bc", "percent.mt")],
                                sobj_traj@meta.data[, c("cell_bc", "cluster_type")],
                                by = "cell_bc")[, "cluster_type"]

Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_traj", order = levels(sobj_traj$cluster_type)) +
  ggplot2::scale_color_manual(values = c(color_markers[c("IBL", "ORS", "HFSC")], bg_color),
                              breaks = c("IBL", "ORS", "HFSC", NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() + Seurat::NoLegend()
```


Project name :

```{r fig_traj_project_name, fig.width = 4, fig.height = 4}
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj_traj), replace = FALSE, size = ncol(sobj_traj))

# Extract coordinates
cells_coord = sobj_traj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_type = sobj_traj$sample_type
cells_coord = cells_coord[order(sobj_traj$sample_type), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_type)) +
  ggplot2::geom_point(size = 1.2) +
  ggplot2::scale_color_manual(values = sample_type_colors,
                              breaks = names(sample_type_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```


Cluster type :

```{r fig_traj_cluster_type, fig.width = 3, fig.height = 3}
Seurat::DimPlot(sobj_traj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

Pseudotime :

```{r fig_traj_pseudotime, fig.width = 6, fig.height = 4}
Seurat::FeaturePlot(sobj_traj, reduction = name2D, pt.size = 0.5,
                    features = "pseudotime") +
  ggplot2::scale_color_gradientn(colors = viridis::viridis(n = 100)) +
  ggplot2::lims(x = range(sobj_traj@reductions[[name2D]]@cell.embeddings[, 1]),
                y = range(sobj_traj@reductions[[name2D]]@cell.embeddings[, 2])) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes()
```

Pseudotime with dynplot's function :

```{r fig_traj_pseudotime_dynplot, fig.width = 5, fig.height = 5}
dynplot::plot_dimred(trajectory = my_traj,
                     dimred = sobj_traj[[name2D]]@cell.embeddings,
                     # Cells
                     color_cells = 'pseudotime',
                     size_cells = 1.6,
                     border_radius_percentage = 0,
                     # Trajectory
                     plot_trajectory = TRUE,
                     color_trajectory = "none",
                     label_milestones = FALSE,
                     size_milestones = 0,
                     size_transitions = 1)
```

# OEP002321 dataset

## Settings

We load the dataset containing all cells :

```{r load_sobj_wu}
sobj = readRDS(paste0(data_dir, "/5_wu/3_combined/wu_sobj.rds"))
sobj
```

This is the projection name to visualize cells :

```{r all_sobj_name2D_wu}
name2D = "harmony_38_tsne"
name2D_atlas = name2D
```

These are all the samples analyzed :

```{r sample_info_wu, class.source = 'fold-hide', fig.width = 8, fig.height = 3}
sample_info = readRDS(paste0(data_dir, "/5_wu/1_metadata/wu_sample_info.rds"))

# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
  as.data.frame.table(., stringsAsFactors = FALSE) %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  dplyr::left_join(x = ., y = sample_info, by = "sample_identifier") 

# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
  patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))
```

We define cluster type and cluster family :

```{r cluster_type_family_wu, fig.width = 7, fig.height = 5}
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type)) %>%
  base::droplevels()
sobj$cluster_family = custom_order_cell_type[sobj$cluster_type, "cell_family"]
sobj$cluster_family = factor(sobj$cluster_family,
                             levels = names(family_color))
```


## Global figures

Project name :

```{r figs2_sample_identifier, fig.width = 4, fig.height = 4}
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D_atlas]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj$project_name
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$project_name) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

Cell type annotation :

```{r figs2_cell_type, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

Cluster type annotation :

```{r figs2_cluster_type, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

Gene expression to assess annotation :

```{r figs2_gene_family, fig.width = 4, fig.height = 4}
genes = c("PTPRC", "MSX2", "KRT14")
names(genes) = c("immune cells", "matrix cells", "non-matrix cells")

plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
  gene = genes[[gene_id]]
  pop = names(genes)[gene_id]
  
  sobj$my_gene = Seurat::FetchData(sobj, gene)[, 1] %>%
    aquarius::run_rescale(., new_min = 0, new_max = 10)
  
  Seurat::FeaturePlot(sobj, features = "my_gene", reduction = name2D_atlas) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
                                   breaks = seq(0, 10, by = 2.5),
                                   labels = c("min", rep("", 3), "max")) +
    ggplot2::labs(title = gene) + 
    # subtitle = pop) +
    ggplot2::theme(aspect.ratio = 1,
                   plot.title = element_text(hjust = 0.5, size = 17),
                   plot.subtitle = element_text(hjust = 0.5, size = 15),
                   legend.text = element_text(size = 15),
                   legend.position = "none") +
    Seurat::NoAxes()
})

plot_list
```

Dotplot :

```{r figs2_dotplot, fig.width = 8, fig.height = 5, class.source = "fold-hide"}
custom_order_cell_type = custom_order_cell_type[levels(sobj$cluster_type), c("cell_type", "cell_family")]

plot_list = Seurat::DotPlot(sobj,
                            features = c("PTPRC",
                                         "CD3E", "CD4",
                                         "CD207", "AIF1",
                                         # "PRDM1", "KRT85",
                                         "MSX2",
                                         "KRT32", "KRT35",
                                         "KRT31", "PRR9",
                                         "BAMBI", "ALDH1A3",
                                         "KRT71", "KRT73",
                                         "TOP2A", "MCM5",
                                         "KRT14", "CXCL14",
                                         "KRT15", "COL17A1",
                                         "DIO2", "TCEAL2",
                                         "KRT16", "KRT6C",
                                         "SPINK5", "LY6D"),
                            group.by = "cluster_type", scale = TRUE,
                            scale.by = "radius", scale.min = NA, scale.max = NA) +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(legend.position = "bottom",
                 legend.direction = "vertical",
                 # legend.justification = "bottom",
                 legend.box = "horizontal",
                 legend.box.margin = margin(0,25,0,0),
                 axis.title = element_blank(),
                 axis.ticks.y = element_blank(),
                 axis.text.y = element_blank(),
                 axis.line.y = element_blank(),
                 axis.text.x = element_text(angle = 45, hjust = 1, size = 11, color = "black"),
                 plot.margin = unit(c(0,0.5,0,0), "cm"))

p = ggplot2::ggplot(custom_order_cell_type, aes(y = cell_type, x = 0)) +
  ggplot2::geom_point(size = 0) +
  ggplot2::geom_segment(aes(y = 0.5, yend = 2.5, x = 0, xend = 0), size = 6, col = family_color["immune cells"]) +
  ggplot2::geom_segment(aes(y = 2.5, yend = 7.5, x = 0, xend = 0), size = 6, col = family_color["matrix"]) +
  ggplot2::geom_segment(aes(y = 7.5, yend = 11.5, x = 0, xend = 0), size = 6, col = family_color["non matrix"]) +
  ggplot2::scale_x_continuous(expand = c(0,0), limits = c(0,0)) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.x = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.title = element_blank(),
                 axis.line.x = element_blank(),
                 axis.text.y = element_text(size = 12, color = "black"),
                 plot.margin = unit(c(0.5,0,0,0.5), "cm"))

plot_list = patchwork::wrap_plots(p, plot_list,
                                  nrow = 1, widths = c(1, 25))
plot_list
```

## IBL and ORS dataset

We load the IBL + ORS dataset :

```{r sobj_iblors_wu}
sobj_iblors = readRDS(paste0(data_dir, "/5_wu/4_ibl_ors/iblmors_sobj.rds"))
sobj_iblors
```


This is the projection name to visualize cells :

```{r sobj_name2D_iblors_wu}
name2D = "harmony_20_tsne"
```

To represent results from differential expression, we load the analyses results :

```{r list_results_iblors_wu}
list_results = readRDS(paste0(data_dir, "/5_wu/4_ibl_ors/iblmors_list_results.rds"))

lapply(list_results, FUN = names)
```

We defined cluster type :

```{r cluster_type_iblors_wu, fig.width = 7, fig.height = 5}
cluster_type = table(sobj_iblors$cell_type, sobj_iblors$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj_iblors$cell_type)[cluster_type])

sobj_iblors$cluster_type = cluster_type[sobj_iblors$seurat_clusters]
sobj_iblors$cluster_type = factor(sobj_iblors$cluster_type,
                                  levels = c("IBL", "ORS"))
```

## IBL + ORS figure

IBL + ORS on the full atlas :

```{r figs2_iblors_location, fig.width = 1.5, fig.height = 1.5}
sobj$cell_bc = colnames(sobj)
sobj_iblors$cell_bc = colnames(sobj_iblors)
sobj$is_iblors = dplyr::left_join(sobj@meta.data[, c("cell_bc", "percent.mt")],
                                  sobj_iblors@meta.data[, c("cell_bc", "cluster_type")],
                                  by = "cell_bc")[, "cluster_type"]

Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_iblors", order = FALSE) +
  ggplot2::scale_color_manual(values = c(color_markers[c("IBL", "ORS")], bg_color),
                              breaks = c("IBL", "ORS", NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()
```

Cluster type :

```{r figs2_iblors_cluster_type, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj_iblors, reduction = name2D, pt.size = 1,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```


DE genes between IBL and ORS :

```{r figs2_iblors_de_pop, fig.width = 6, fig.height = 6}
mark = list_results$IBL_vs_ORS$mark
mark$gene_name = rownames(mark)
mark_label = rbind(
  # up-regulated in IBL
  mark %>% dplyr::top_n(., n = 20, wt = avg_logFC),
  # up-regulated in ORS
  mark %>% dplyr::top_n(., n = 20, wt = -avg_logFC),
  # representative and selective for IBL
  mark %>% dplyr::top_n(., n = 20, wt = (pct.1 - pct.2)),
  # representative and selective for ORS
  mark %>% dplyr::top_n(., n = 20, wt = -(pct.1 - pct.2))) %>%
  dplyr::distinct()
mark_label = mark_label[!grepl(rownames(mark_label), pattern = "^MT"), ]

avg_logFC_range = setNames(c(min(mark_label$avg_logFC), -1, 0, 1, max(mark_label$avg_logFC)),
                           nm = c("dodgerblue4", "dodgerblue3", "#B7B7B7", "firebrick3", "firebrick4"))


ggplot2::ggplot(mark, aes(x = pct.1, y = pct.2, col = avg_logFC)) +
  ggplot2::geom_abline(slope = 1, intercept = 0, lty = 2) +
  ggplot2::geom_point() +
  ggrepel::geom_label_repel(data = mark_label, max.overlaps = Inf,
                            aes(x = pct.1, y = pct.2, label = gene_name),
                            col = "black", fill = NA, size = 3.5, label.size = NA) +
  ggplot2::labs(x = "Enriched in IBL",
                y = "Enriched in ORS") +
  ggplot2::scale_color_gradientn(colors = names(avg_logFC_range),
                                 values = scales::rescale(unname(avg_logFC_range))) +
  ggplot2::theme_classic() +
  ggplot2::theme(aspect.ratio = 1)
```

GSEA plot :

```{r figs2_iblors_keratinization, fig.width = 6, fig.height = 4}
the_gs_name = "REACTOME_KERATINIZATION" 
the_content = list_results$IBL_vs_ORS$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      " | pvalue : ", round(the_content$pvalue, 4),
                      " | set size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))
```

```{r figs2_iblors_ifna, fig.width = 6, fig.height = 4}
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_content = list_results$IBL_vs_ORS$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      " | pvalue : ", round(the_content$pvalue, 4),
                      " | set size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))
```

Genes of interest :

```{r figs2_iblors_genes, fig.width = 2, fig.height = 2}
genes = c("KRT16", "COL17A1", "DST", "KRT6B", "IL1R2", "WNT3",
          "IFI27", "CXCL14", "IGFBP3", "KRT15", "CD200")

plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
  gene = genes[[gene_id]]
  
  sobj_iblors$my_gene = Seurat::FetchData(sobj_iblors, gene)[, 1] %>%
    aquarius::run_rescale(., new_min = 0, new_max = 10)
  
  Seurat::FeaturePlot(sobj_iblors, features = "my_gene", reduction = name2D, pt.size = 0.25) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
                                   breaks = seq(0, 10, by = 2.5),
                                   labels = c("min", rep("", 3), "max")) +
    ggplot2::labs(title = gene) +
    ggplot2::theme(aspect.ratio = 1,
                   plot.title = element_text(hjust = 0.5, size = 17),
                   plot.subtitle = element_text(hjust = 0.5, size = 15),
                   legend.text = element_text(size = 15),
                   legend.position = "none") +
    Seurat::NoAxes()
})

plot_list
```

# Supplementary tables

In this section, we save files to associate with the manuscript, as supplementary tables.

## Table S2 (package version)

We load the table :

```{r ts2_load}
package_version = read.table(paste0(".", "/data/info_to_install_2023_04_17.txt"),
                             header = TRUE)
head(package_version)
```

Columns correspond to :

* **order** : order to install package such that one never need to install dependencies
* **package_name** : package name
* **version** : installed version, on the local machine
* **url** : the package was installed in the Singularity container using this url

We save this table, except the last column (just for me) :

```{r ts2_save}
openxlsx::write.xlsx(package_version[, c(1:4)],
                     file = paste0(".", "/data/Supplementary Table 2.xlsx"))
```

## Table S3 (cell type annotation)

We load the table :

```{r ts3_load}
cell_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_cell_markers.rds"))
lengths(cell_markers)
```

We save this table, except the last column (just for me) :

```{r ts3_save}
openxlsx::write.xlsx(cell_markers,
                     file = paste0(".", "/data/Supplementary Table 3.xlsx"))
```


# R Session

```{r sessioninfo, echo = FALSE, fold_output = TRUE}
sessionInfo()
```

